{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5ea7de55",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
       "                'plotly': ['https://cdn.plot.ly/plotly-2.9.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
       "            window._Plotly = Plotly;\n",
       "        });\n",
       "        }\n",
       "        </script>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt \n",
    "%matplotlib inline \n",
    "import plotly \n",
    "import plotly.graph_objects as go\n",
    "import plotly.express as pe \n",
    "import plotly.offline as po\n",
    "from plotly.offline import init_notebook_mode\n",
    "import cufflinks as cf\n",
    "from plotly.offline import iplot\n",
    "cf.go_offline()\n",
    "import warnings\n",
    "from mlxtend.plotting import plot_confusion_matrix\n",
    "from sklearn.feature_selection import SelectKBest, f_classif\n",
    "warnings.filterwarnings('ignore')\n",
    "from sklearn.impute import SimpleImputer,KNNImputer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import RobustScaler,OneHotEncoder,StandardScaler\n",
    "from sklearn.pipeline import Pipeline,make_pipeline,FeatureUnion\n",
    "from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.svm import SVC\n",
    "from xgboost import XGBClassifier\n",
    "from catboost import CatBoostClassifier\n",
    "from sklearn.cluster import KMeans\n",
    "from kneed import KneeLocator\n",
    "from imblearn.ensemble import EasyEnsembleClassifier\n",
    "from sklearn.naive_bayes import GaussianNB  \n",
    "from sklearn.compose import ColumnTransformer\n",
    "from imblearn.over_sampling import RandomOverSampler\n",
    "from imblearn.pipeline import Pipeline as ImbPipeline\n",
    "from imblearn.combine import SMOTETomek\n",
    "from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay,precision_score, recall_score, f1_score, roc_auc_score,roc_curve,confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9a929a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install imblearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b2df335c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54e0b50b",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install kneed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "92770b92",
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.api import add_constant"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "40499269",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b0584192",
   "metadata": {},
   "outputs": [],
   "source": [
    "df=pd.read_csv(\"Thyroid_EDA.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f06f2dc3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>thyroid_surgery</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>query_hypothyroid</th>\n",
       "      <th>...</th>\n",
       "      <th>goitre</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41.0</td>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>...</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2.5</td>\n",
       "      <td>125.0</td>\n",
       "      <td>1.14</td>\n",
       "      <td>109.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>23.0</td>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>...</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>4.10</td>\n",
       "      <td>2.0</td>\n",
       "      <td>102.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>46.0</td>\n",
       "      <td>M</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>...</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>109.0</td>\n",
       "      <td>0.91</td>\n",
       "      <td>120.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>70.0</td>\n",
       "      <td>F</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>...</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.16</td>\n",
       "      <td>1.9</td>\n",
       "      <td>175.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>70.0</td>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>...</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.72</td>\n",
       "      <td>1.2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>0.87</td>\n",
       "      <td>70.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    age sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick  \\\n",
       "0  41.0   F            f                  f                         f    f   \n",
       "1  23.0   F            f                  f                         f    f   \n",
       "2  46.0   M            f                  f                         f    f   \n",
       "3  70.0   F            t                  f                         f    f   \n",
       "4  70.0   F            f                  f                         f    f   \n",
       "\n",
       "  pregnant thyroid_surgery I131_treatment query_hypothyroid  ... goitre tumor  \\\n",
       "0        f               f              f                 f  ...      f     f   \n",
       "1        f               f              f                 f  ...      f     f   \n",
       "2        f               f              f                 f  ...      f     f   \n",
       "3        f               f              f                 f  ...      f     f   \n",
       "4        f               f              f                 f  ...      f     f   \n",
       "\n",
       "  hypopituitary psych   TSH   T3    TT4   T4U    FTI     Class  \n",
       "0             f     f  1.30  2.5  125.0  1.14  109.0  negative  \n",
       "1             f     f  4.10  2.0  102.0   NaN    NaN  negative  \n",
       "2             f     f  0.98  NaN  109.0  0.91  120.0  negative  \n",
       "3             f     f  0.16  1.9  175.0   NaN    NaN  negative  \n",
       "4             f     f  0.72  1.2   61.0  0.87   70.0  negative  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8f417294",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "T3                           709\n",
       "T4U                          327\n",
       "FTI                          325\n",
       "TSH                          309\n",
       "TT4                          171\n",
       "sex                          149\n",
       "age                            1\n",
       "psych                          0\n",
       "hypopituitary                  0\n",
       "tumor                          0\n",
       "goitre                         0\n",
       "lithium                        0\n",
       "query_hyperthyroid             0\n",
       "query_hypothyroid              0\n",
       "I131_treatment                 0\n",
       "thyroid_surgery                0\n",
       "pregnant                       0\n",
       "sick                           0\n",
       "on_antithyroid_medication      0\n",
       "query_on_thyroxine             0\n",
       "on_thyroxine                   0\n",
       "Class                          0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isna().sum().sort_values(ascending=False)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ae2b4888",
   "metadata": {},
   "source": [
    "# Checking missing values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1bf3df34",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA34AAAEnCAYAAAD2N5ECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAX40lEQVR4nO3dfbBtZX0f8O9PEOMLRpGDInK8xDB0MFW0d1BC28HXwsWIyVgKJoov06tJfKsmEfuiaSbpkLbajmLE20DFKaJmFKVyK9LUikx8A4oCAkIQx+tlQKMCVqtc/PWPs29zetzn3s1lr3O4i89nZs9e63metZ/f/vM7z1rPqu4OAAAA4/Wg9S4AAACAYQl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACM3WPCrqkOr6jNVdV1VXVtVb5i0H1BVl1TVjZPvR69y/fFVdUNV3VRVpw9VJwAAwNjVUO/xq6qDkxzc3VdW1f5JrkjyoiQvT/K97j5jEuge3d1vWXHtPkm+nuR5SbYl+XKSU7v7a4MUCwAAMGKDrfh1963dfeXk+K4k1yU5JMlJSc6dDDs3S2FwpaOT3NTdN3f3T5N8aHIdAAAA99K+azFJVW1I8rQkX0zy2O6+NVkKh1V10JRLDknyrWXn25I8Y3fzHHjggb1hw4b7XC8AAMDe6Iorrvhudy+sbB88+FXVI5J8NMkbu/vOqprpsiltU+9JrarNSTYnyeLiYi6//PI9LRUAAGCvVlXfnNY+6K6eVfXgLIW+87r7Y5Pm2ybP/+18DvD2KZduS3LosvMnJNk+bY7u3tLdG7t748LCzwVbAACAB7whd/WsJGcnua6737ms68Ikp02OT0vyiSmXfznJ4VV1WFXtl+SUyXUAAADcS0Ou+B2b5KVJnl1VV00+m5KckeR5VXVjlnbtPCNJqurxVbU1Sbp7R5LXJrk4S5vCfKS7rx2wVgAAgNEa7Bm/7r4s05/VS5LnTBm/PcmmZedbk2wdpjoAAIAHjkGf8QMAAGD9CX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjNxg7/Hjb204/aI1ne+WM05c0/kAAID7Nyt+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACMn+AEAAIyc4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACO371A/XFXnJHlBktu7+1cmbR9OcsRkyKOS/KC7j5py7S1J7kpyT5Id3b1xqDoBAADGbrDgl+T9Sc5M8oGdDd39T3YeV9U7ktyxi+uf1d3fHaw6AACAB4jBgl93X1pVG6b1VVUlOTnJs4eaHwAAgCXr9YzfP0hyW3ffuEp/J/l0VV1RVZvXsC4AAIDRGfJWz105Ncn5u+g/tru3V9VBSS6pquu7+9JpAyfBcHOSLC4uzr9SAACAvdyar/hV1b5JfiPJh1cb093bJ9+3J7kgydG7GLuluzd298aFhYV5lwsAALDXW49bPZ+b5Pru3jats6oeXlX77zxO8vwk16xhfQAAAKMyWPCrqvOTfD7JEVW1rapeNek6JStu86yqx1fV1snpY5NcVlVfSfKlJBd196eGqhMAAGDshtzV89RV2l8+pW17kk2T45uTPHWougAAAB5o1mtXTwAAANaI4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACMn+AEAAIyc4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACMn+AEAAIzcYMGvqs6pqtur6pplbX9YVd+uqqsmn02rXHt8Vd1QVTdV1elD1QgAAPBAMOSK3/uTHD+l/T9091GTz9aVnVW1T5L3JDkhyZFJTq2qIwesEwAAYNQGC37dfWmS7+3BpUcnuam7b+7unyb5UJKT5locAADAA8h6POP32qr66uRW0EdP6T8kybeWnW+btAEAALAH1jr4vTfJk5IcleTWJO+YMqamtPVqP1hVm6vq8qq6/Dvf+c5cigQAABiTNQ1+3X1bd9/T3T9L8p+ydFvnStuSHLrs/AlJtu/iN7d098bu3riwsDDfggEAAEZgTYNfVR287PTXk1wzZdiXkxxeVYdV1X5JTkly4VrUBwAAMEb7DvXDVXV+kuOSHFhV25K8PclxVXVUlm7dvCXJqydjH5/kz7t7U3fvqKrXJrk4yT5Jzunua4eqEwAAYOwGC37dfeqU5rNXGbs9yaZl51uT/NyrHgAAALj31mNXTwAAANaQ4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACMn+AEAAIyc4AcAADBygh8AAMDI7bu7AVV1V5Je0XxHksuTvLm7bx6iMAAAAOZjt8EvyTuTbE/ywSSV5JQkj0tyQ5Jzkhw3VHEAAADcd7Pc6nl8d7+vu+/q7ju7e0uSTd394SSPHrg+AAAA7qNZgt/PqurkqnrQ5HPysr6Vt4ACAABwPzNL8PvNJC9NcnuS2ybHv1VVD03y2gFrAwAAYA52+4zfZPOWX1ul+7L5lgMAAMC8zbKr50KSf5pkw/Lx3f3K4coCAABgXmbZ1fMTST6X5L8nuWfYcgAAAJi3WYLfw7r7LYNXAgAAwCBm2dzlk1W1afBKAAAAGMQswe8NWQp/P66qO6vqrqq6c3cXVdU5VXV7VV2zrO3fVdX1VfXVqrqgqh61yrW3VNXVVXVVVV0+878BAADg5+w2+HX3/t39oO5+aHc/cnL+yBl++/1Jjl/RdkmSX+nupyT5epK37uL6Z3X3Ud29cYa5AAAAWMWqz/hV1d/p7uur6unT+rv7yl39cHdfWlUbVrR9etnpF5K8+F7UCgAAwB7Y1eYub0qyOck7pvR1kmffx7lfmeTDq/R1kk9XVSd5X3dvWe1HqmrzpM4sLi7ex5IAAADGZ9Xg192bJ9/PmvekVfUvkuxIct4qQ47t7u1VdVCSS6rq+u6+dJU6tyTZkiQbN27sedcKAACwt9vtM35V9Y+rav/J8b+sqo9V1dP2dMKqOi3JC5L8ZndPDWrdvX3yfXuSC5IcvafzAQAAPNDNsqvnv+ruu6rq7yf5R0nOTXLWnkxWVccneUuSF3b3j1YZ8/BlQfPhSZ6f5JppYwEAANi9WYLfPZPvE5O8t7s/kWS/3V1UVecn+XySI6pqW1W9KsmZSfbP0u2bV1XVWZOxj6+qrZNLH5vksqr6SpIvJbmouz91r/4VAAAA/8+uNnfZ6dtV9b4kz03yp1X1kMz2GohTpzSfvcrY7Uk2TY5vTvLUGeoCAABgBrOs+J2c5OIkx3f3D5IckOT3hywKAACA+Zllxe/gLN1u+ZOqOi7JU5J8YMiiAAAAmJ9ZVvw+muSeqvrlLN2qeViSDw5aFQAAAHMzS/D7WXfvSPIbSf5jd/+zLK0CAgAAsBeYJfjdXVWnJnlZkk9O2h48XEkAAADM0yzB7xVJjknyJ939jao6LMl/GbYsAAAA5mW3m7t099eSvH7Z+TeSnDFkUQAAAMzPqsGvqj7S3SdX1dVJenlXku7upwxeHQAAAPfZrlb83jD5fsFaFAIAAMAwVg1+3X3r5PubSVJVj9zVeAAAAO6fdhvkqurVSf4oyY/zt7d8dpJfGrAuAAAA5mSWFbzfS/Lk7v7u0MUAAAAwf7O8zuGvk/xo6EIAAAAYxiwrfm9N8ldV9cUkP9nZ2N2vX/0SAAAA7i9mCX7vS/I/klyd5GfDlgMAAMC8zRL8dnT3mwavBAAAgEHM8ozfZ6pqc1UdXFUH7PwMXhkAAABzMcuK30sm329d1uZ1DgAAAHuJ3Qa/7j5sLQoBAABgGLPc6gkAAMBeTPADAAAYOcEPAABg5HYb/GrJb1XV2ybni1V19PClAQAAMA+zrPj9WZJjkpw6Ob8ryXsGqwgAAIC5muV1Ds/o7qdX1f9Kku7+flXtN3BdAAAAzMksK353V9U+WXp3X6pqIcnPdndRVZ1TVbdX1TXL2g6oqkuq6sbJ96NXufb4qrqhqm6qqtNn/C8AAABMMUvwe1eSC5IcVFV/kuSyJP9mhuven+T4FW2nJ/nL7j48yV9Ozv8/k5D5niQnJDkyyalVdeQM8wEAADDFLC9wP6+qrkjynCSV5EXdfd0M111aVRtWNJ+U5LjJ8blJ/meSt6wYc3SSm7r75iSpqg9Nrvva7uYEAADg5+02+FXVAUluT3L+srYHd/fdezDfY7v71iTp7lur6qApYw5J8q1l59uSPGMX9W1OsjlJFhcX96AkAACAcZvlVs8rk3wnydeT3Dg5/kZVXVlVf2+AmmpKW682uLu3dPfG7t64sLAwQDkAAAB7t1mC36eSbOruA7v7MVl69u4jSX4nS696uDduq6qDk2TyffuUMduSHLrs/AlJtt/LeQAAAJiYJfht7O6Ld55096eT/MPu/kKSh9zL+S5Mctrk+LQkn5gy5stJDq+qwyavjThlch0AAAB7YJbg972qektVPXHy+YMk35/svrnqax2q6vwkn09yRFVtq6pXJTkjyfOq6sYkz5ucp6oeX1Vbk6S7dyR5bZKLk1yX5CPdfe19+I8AAAAPaLO8wP0lSd6e5ONZev7usknbPklOXu2i7j51la7nTBm7PcmmZedbk2ydoTYAAAB2Y5bXOXw3yetW6b5pvuUAAAAwb7O8zmEhyR8keXKSX9jZ3t3PHrAuAAAA5mSWZ/zOS3J9ksOS/Oskt2RpAxYAAAD2ArMEv8d099lJ7u7uz3b3K5M8c+C6AAAAmJNZNne5e/J9a1WdmKV36j1huJIAAACYp1mC3x9X1S8meXOSdyd5ZJI3DlkUAAAA8zNL8Pt+d9+R5I4kz0qSqjp20KoAAACYm1me8Xv3jG0AAADcD6264ldVxyT51SQLVfWmZV2PzNLL2wEAANgL7OpWz/2SPGIyZv9l7XcmefGQRQEAADA/qwa/7v5sks9W1fu7+5trWBMD2nD6RWs21y1nnLhmcwEAAKubZXOXh1TVliQblo/v7mcPVRTjt5YBNBFCAQB4YJsl+P1FkrOS/HmSe4YtBwAAgHmbJfjt6O73Dl4JAAAAg5jldQ7/tap+p6oOrqoDdn4GrwwAAIC5mGXF77TJ9+8va+skvzT/cgAAAJi33Qa/7j5sLQoBAABgGLsNflX1sCRvSrLY3Zur6vAkR3T3JwevDtaAHUYBABi7WZ7x+89JfprkVyfn25L88WAVAQAAMFezBL8ndfe/TXJ3knT3j5PUoFUBAAAwN7MEv59W1UOztKFLqupJSX4yaFUAAADMzSy7er49yaeSHFpV5yU5NsnLhywKAACA+ZllV89LqurKJM/M0i2eb+ju7w5eGQAAAHOx21s9q+rXk+zo7osmO3nuqKoXDV4ZAAAAczHLM35v7+47dp509w+ydPsnAAAAe4FZgt+0MbM8GzhVVR1RVVct+9xZVW9cMea4qrpj2Zi37el8AAAAD3SzBLjLq+qdSd6TpZ09X5fkij2dsLtvSHJUklTVPkm+neSCKUM/190v2NN5AAAAWDLLit/rsvQC9w8n+UiSHyf53TnN/5wkf93d35zT7wEAALDCLlf8Jityn+ju5w40/ylJzl+l75iq+kqS7Ul+r7uvXaXGzUk2J8ni4uIgRQIAAOzNdrni1933JPlRVf3ivCeuqv2SvDDJX0zpvjLJE7v7qUneneTju6hxS3dv7O6NCwsL8y4TAABgrzfLM37/J8nVVXVJkv+9s7G7X38f5z4hyZXdfdvKju6+c9nx1qr6s6o60PsDAQAA7r1Zgt9Fk8+8nZpVbvOsqsclua27u6qOztLK5N8MUAMAAMDo7Tb4dfe5VfXQJIuTHTnvs6p6WJLnJXn1srbXTOY7K8mLk/x2Ve3I0mYyp3R3z2NuAACAB5rdBr+q+rUk/z7JfkkOq6qjkvxRd79wTyft7h8lecyKtrOWHZ+Z5Mw9/X0AAAD+1iyvc/jDJEcn+UGSdPdVSQ4brCIAAADmapbgt6O771jR5rZLAACAvcQsm7tcU1UvSbJPVR2e5PVJ/mrYsgAAAJiXWVb8XpfkyUl+kuSDSe5I8sYBawIAAGCOVl3xq6pfSPKaJL+c5Ookx3T3jrUqDAAAgPnY1YrfuUk2Zin0nZClnT0BAADYy+zqGb8ju/vvJklVnZ3kS2tTEgAAAPO0qxW/u3ceuMUTAABg77WrFb+nVtWdk+NK8tDJeSXp7n7k4NUBAABwn60a/Lp7n7UsBAAAgGHM8joHAAAA9mKCHwAAwMgJfgAAACMn+AEAAIyc4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMitS/Crqluq6uqquqqqLp/SX1X1rqq6qaq+WlVPX486AQAAxmDfdZz7Wd393VX6Tkhy+OTzjCTvnXwDAABwL91fb/U8KckHeskXkjyqqg5e76IAAAD2RusV/DrJp6vqiqraPKX/kCTfWna+bdIGAADAvbRet3oe293bq+qgJJdU1fXdfemy/ppyTU/7oUlw3Jwki4uL868UAABgL7cuK37dvX3yfXuSC5IcvWLItiSHLjt/QpLtq/zWlu7e2N0bFxYWhigXAABgr7bmwa+qHl5V++88TvL8JNesGHZhkpdNdvd8ZpI7uvvWNS4VAABgFNbjVs/HJrmgqnbO/8Hu/lRVvSZJuvusJFuTbEpyU5IfJXnFOtQJAAAwCmse/Lr75iRPndJ+1rLjTvK7a1kXAADAWK3ne/yAFTacftGaznfLGSeu6XwAAKyP++t7/AAAAJgTwQ8AAGDkBD8AAICRE/wAAABGTvADAAAYOcEPAABg5AQ/AACAkfMeP2Aq7xQEABgPK34AAAAjJ/gBAACMnOAHAAAwcoIfAADAyAl+AAAAIyf4AQAAjJzgBwAAMHKCHwAAwMgJfgAAACMn+AEAAIyc4AcAADBygh8AAMDICX4AAAAjJ/gBAACMnOAHAAAwcoIfAADAyK158KuqQ6vqM1V1XVVdW1VvmDLmuKq6o6qumnzettZ1AgAAjMW+6zDnjiRv7u4rq2r/JFdU1SXd/bUV4z7X3S9Yh/oAAABGZc1X/Lr71u6+cnJ8V5Lrkhyy1nUAAAA8UKzrM35VtSHJ05J8cUr3MVX1lar6b1X15LWtDAAAYDzW41bPJElVPSLJR5O8sbvvXNF9ZZIndvcPq2pTko8nOXyV39mcZHOSLC4uDlcwAADAXmpdVvyq6sFZCn3ndffHVvZ3953d/cPJ8dYkD66qA6f9Vndv6e6N3b1xYWFh0LoBAAD2Ruuxq2clOTvJdd39zlXGPG4yLlV1dJbq/Ju1qxIAAGA81uNWz2OTvDTJ1VV11aTtnydZTJLuPivJi5P8dlXtSPLjJKd0d69DrQAAAHu9NQ9+3X1ZktrNmDOTnLk2FQEAAIzbuu7qCQAAwPAEPwAAgJET/AAAAEZO8AMAABg5wQ8AAGDkBD8AAICRE/wAAABGTvADAAAYOcEPAABg5AQ/AACAkRP8AAAARk7wAwAAGDnBDwAAYOQEPwAAgJET/AAAAEZO8AMAABg5wQ8AAGDkBD8AAICRE/wAAABGTvADAAAYOcEPAABg5AQ/AACAkRP8AAAARk7wAwAAGDnBDwAAYOTWJfhV1fFVdUNV3VRVp0/pr6p616T/q1X19PWoEwAAYAzWPPhV1T5J3pPkhCRHJjm1qo5cMeyEJIdPPpuTvHdNiwQAABiR9VjxOzrJTd19c3f/NMmHkpy0YsxJST7QS76Q5FFVdfBaFwoAADAG+67DnIck+day821JnjHDmEOS3DpsacD90YbTL1rT+W4548Q1nQ8AYGjrEfxqSlvvwZilgVWbs3Q7aJL8sKpuuA+1jUL96XpXsOT+UkeiltWoZbr7Uy0AAPfSE6c1rkfw25bk0GXnT0iyfQ/GJEm6e0uSLfMsEAAAYEzW4xm/Lyc5vKoOq6r9kpyS5MIVYy5M8rLJ7p7PTHJHd7vNEwAAYA+s+Ypfd++oqtcmuTjJPknO6e5rq+o1k/6zkmxNsinJTUl+lOQVa10nAADAWFT31EfnAAAAGIl1eYE7AAAAa0fwAwAAGDnBDwAAYOQEPwAAgJET/AAAAEZO8AMAABg5wQ8AAGDkBD8AAICR+785vILTaw1XvwAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Plotting Missing values count for each column\n",
    "fig, ax = plt.subplots(figsize=(15,5))\n",
    "\n",
    "missing = df.isna().sum().div(df.shape[0]).mul(100).to_frame().sort_values(by=0, ascending = False)\n",
    "\n",
    "ax.bar(missing.index, missing.values.T[0])\n",
    "plt.xticks([])\n",
    "plt.ylabel(\"Percentage missing\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "f61cc62a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Percentage of total missing cells in the data 2.4386957693344113%\n"
     ]
    }
   ],
   "source": [
    "missing_values_count= df.isnull().sum()\n",
    "total_cells = np.product(df.shape)\n",
    "total_missing = missing_values_count.sum()\n",
    "\n",
    "# percent of data that is missing\n",
    "print(f\"Percentage of total missing cells in the data {(total_missing/total_cells) * 100}%\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "ceebfad1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "negative                   3420\n",
       "compensated_hypothyroid     194\n",
       "primary_hypothyroid          95\n",
       "secondary_hypothyroid         2\n",
       "Name: Class, dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Class'].value_counts()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "2c4e7ac7",
   "metadata": {},
   "source": [
    "# Visualization of unique values in Target variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9202235a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.plotly.v1+json": {
       "config": {
        "plotlyServerURL": "https://plot.ly"
       },
       "data": [
        {
         "alignmentgroup": "True",
         "hovertemplate": "x=%{x}<br>y=%{y}<extra></extra>",
         "legendgroup": "",
         "marker": {
          "color": "#636efa",
          "pattern": {
           "shape": ""
          }
         },
         "name": "",
         "offsetgroup": "",
         "orientation": "v",
         "showlegend": false,
         "textposition": "auto",
         "type": "bar",
         "x": [
          "negative",
          "compensated_hypothyroid",
          "primary_hypothyroid",
          "secondary_hypothyroid"
         ],
         "xaxis": "x",
         "y": [
          3420,
          194,
          95,
          2
         ],
         "yaxis": "y"
        }
       ],
       "layout": {
        "barmode": "relative",
        "legend": {
         "tracegroupgap": 0
        },
        "margin": {
         "t": 60
        },
        "template": {
         "data": {
          "bar": [
           {
            "error_x": {
             "color": "#2a3f5f"
            },
            "error_y": {
             "color": "#2a3f5f"
            },
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "bar"
           }
          ],
          "barpolar": [
           {
            "marker": {
             "line": {
              "color": "#E5ECF6",
              "width": 0.5
             },
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "barpolar"
           }
          ],
          "carpet": [
           {
            "aaxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "baxis": {
             "endlinecolor": "#2a3f5f",
             "gridcolor": "white",
             "linecolor": "white",
             "minorgridcolor": "white",
             "startlinecolor": "#2a3f5f"
            },
            "type": "carpet"
           }
          ],
          "choropleth": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "choropleth"
           }
          ],
          "contour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "contour"
           }
          ],
          "contourcarpet": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "contourcarpet"
           }
          ],
          "heatmap": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmap"
           }
          ],
          "heatmapgl": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "heatmapgl"
           }
          ],
          "histogram": [
           {
            "marker": {
             "pattern": {
              "fillmode": "overlay",
              "size": 10,
              "solidity": 0.2
             }
            },
            "type": "histogram"
           }
          ],
          "histogram2d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2d"
           }
          ],
          "histogram2dcontour": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "histogram2dcontour"
           }
          ],
          "mesh3d": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "type": "mesh3d"
           }
          ],
          "parcoords": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "parcoords"
           }
          ],
          "pie": [
           {
            "automargin": true,
            "type": "pie"
           }
          ],
          "scatter": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter"
           }
          ],
          "scatter3d": [
           {
            "line": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatter3d"
           }
          ],
          "scattercarpet": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattercarpet"
           }
          ],
          "scattergeo": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergeo"
           }
          ],
          "scattergl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattergl"
           }
          ],
          "scattermapbox": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scattermapbox"
           }
          ],
          "scatterpolar": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolar"
           }
          ],
          "scatterpolargl": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterpolargl"
           }
          ],
          "scatterternary": [
           {
            "marker": {
             "colorbar": {
              "outlinewidth": 0,
              "ticks": ""
             }
            },
            "type": "scatterternary"
           }
          ],
          "surface": [
           {
            "colorbar": {
             "outlinewidth": 0,
             "ticks": ""
            },
            "colorscale": [
             [
              0,
              "#0d0887"
             ],
             [
              0.1111111111111111,
              "#46039f"
             ],
             [
              0.2222222222222222,
              "#7201a8"
             ],
             [
              0.3333333333333333,
              "#9c179e"
             ],
             [
              0.4444444444444444,
              "#bd3786"
             ],
             [
              0.5555555555555556,
              "#d8576b"
             ],
             [
              0.6666666666666666,
              "#ed7953"
             ],
             [
              0.7777777777777778,
              "#fb9f3a"
             ],
             [
              0.8888888888888888,
              "#fdca26"
             ],
             [
              1,
              "#f0f921"
             ]
            ],
            "type": "surface"
           }
          ],
          "table": [
           {
            "cells": {
             "fill": {
              "color": "#EBF0F8"
             },
             "line": {
              "color": "white"
             }
            },
            "header": {
             "fill": {
              "color": "#C8D4E3"
             },
             "line": {
              "color": "white"
             }
            },
            "type": "table"
           }
          ]
         },
         "layout": {
          "annotationdefaults": {
           "arrowcolor": "#2a3f5f",
           "arrowhead": 0,
           "arrowwidth": 1
          },
          "autotypenumbers": "strict",
          "coloraxis": {
           "colorbar": {
            "outlinewidth": 0,
            "ticks": ""
           }
          },
          "colorscale": {
           "diverging": [
            [
             0,
             "#8e0152"
            ],
            [
             0.1,
             "#c51b7d"
            ],
            [
             0.2,
             "#de77ae"
            ],
            [
             0.3,
             "#f1b6da"
            ],
            [
             0.4,
             "#fde0ef"
            ],
            [
             0.5,
             "#f7f7f7"
            ],
            [
             0.6,
             "#e6f5d0"
            ],
            [
             0.7,
             "#b8e186"
            ],
            [
             0.8,
             "#7fbc41"
            ],
            [
             0.9,
             "#4d9221"
            ],
            [
             1,
             "#276419"
            ]
           ],
           "sequential": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ],
           "sequentialminus": [
            [
             0,
             "#0d0887"
            ],
            [
             0.1111111111111111,
             "#46039f"
            ],
            [
             0.2222222222222222,
             "#7201a8"
            ],
            [
             0.3333333333333333,
             "#9c179e"
            ],
            [
             0.4444444444444444,
             "#bd3786"
            ],
            [
             0.5555555555555556,
             "#d8576b"
            ],
            [
             0.6666666666666666,
             "#ed7953"
            ],
            [
             0.7777777777777778,
             "#fb9f3a"
            ],
            [
             0.8888888888888888,
             "#fdca26"
            ],
            [
             1,
             "#f0f921"
            ]
           ]
          },
          "colorway": [
           "#636efa",
           "#EF553B",
           "#00cc96",
           "#ab63fa",
           "#FFA15A",
           "#19d3f3",
           "#FF6692",
           "#B6E880",
           "#FF97FF",
           "#FECB52"
          ],
          "font": {
           "color": "#2a3f5f"
          },
          "geo": {
           "bgcolor": "white",
           "lakecolor": "white",
           "landcolor": "#E5ECF6",
           "showlakes": true,
           "showland": true,
           "subunitcolor": "white"
          },
          "hoverlabel": {
           "align": "left"
          },
          "hovermode": "closest",
          "mapbox": {
           "style": "light"
          },
          "paper_bgcolor": "white",
          "plot_bgcolor": "#E5ECF6",
          "polar": {
           "angularaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "radialaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "scene": {
           "xaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "yaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           },
           "zaxis": {
            "backgroundcolor": "#E5ECF6",
            "gridcolor": "white",
            "gridwidth": 2,
            "linecolor": "white",
            "showbackground": true,
            "ticks": "",
            "zerolinecolor": "white"
           }
          },
          "shapedefaults": {
           "line": {
            "color": "#2a3f5f"
           }
          },
          "ternary": {
           "aaxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "baxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           },
           "bgcolor": "#E5ECF6",
           "caxis": {
            "gridcolor": "white",
            "linecolor": "white",
            "ticks": ""
           }
          },
          "title": {
           "x": 0.05
          },
          "xaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          },
          "yaxis": {
           "automargin": true,
           "gridcolor": "white",
           "linecolor": "white",
           "ticks": "",
           "title": {
            "standoff": 15
           },
           "zerolinecolor": "white",
           "zerolinewidth": 2
          }
         }
        },
        "xaxis": {
         "anchor": "y",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "x"
         }
        },
        "yaxis": {
         "anchor": "x",
         "domain": [
          0,
          1
         ],
         "title": {
          "text": "y"
         }
        }
       }
      },
      "text/html": [
       "<div>                            <div id=\"52d40c9b-d8c6-489a-8d3f-d0a6193a7748\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                require([\"plotly\"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"52d40c9b-d8c6-489a-8d3f-d0a6193a7748\")) {                    Plotly.newPlot(                        \"52d40c9b-d8c6-489a-8d3f-d0a6193a7748\",                        [{\"alignmentgroup\":\"True\",\"hovertemplate\":\"x=%{x}<br>y=%{y}<extra></extra>\",\"legendgroup\":\"\",\"marker\":{\"color\":\"#636efa\",\"pattern\":{\"shape\":\"\"}},\"name\":\"\",\"offsetgroup\":\"\",\"orientation\":\"v\",\"showlegend\":false,\"textposition\":\"auto\",\"x\":[\"negative\",\"compensated_hypothyroid\",\"primary_hypothyroid\",\"secondary_hypothyroid\"],\"xaxis\":\"x\",\"y\":[3420,194,95,2],\"yaxis\":\"y\",\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoords\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"parcoords\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}],\"scatter\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter\"}],\"scatter3d\":[{\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatter3d\"}],\"scattercarpet\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattercarpet\"}],\"scattergeo\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergeo\"}],\"scattergl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattergl\"}],\"scattermapbox\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scattermapbox\"}],\"scatterpolar\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolar\"}],\"scatterpolargl\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterpolargl\"}],\"scatterternary\":[{\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"type\":\"scatterternary\"}],\"surface\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"surface\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}]},\"layout\":{\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"autotypenumbers\":\"strict\",\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]],\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]},\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"geo\":{\"bgcolor\":\"white\",\"lakecolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"showlakes\":true,\"showland\":true,\"subunitcolor\":\"white\"},\"hoverlabel\":{\"align\":\"left\"},\"hovermode\":\"closest\",\"mapbox\":{\"style\":\"light\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"bgcolor\":\"#E5ECF6\",\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"gridwidth\":2,\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\"}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"ternary\":{\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"bgcolor\":\"#E5ECF6\",\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"title\":{\"x\":0.05},\"xaxis\":{\"automargin\":true,\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"zerolinewidth\":2},\"yaxis\":{\"automargin\":true,\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"zerolinewidth\":2}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"x\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"y\"}},\"legend\":{\"tracegroupgap\":0},\"margin\":{\"t\":60},\"barmode\":\"relative\"},                        {\"responsive\": true}                    ).then(function(){\n",
       "                            \n",
       "var gd = document.getElementById('52d40c9b-d8c6-489a-8d3f-d0a6193a7748');\n",
       "var x = new MutationObserver(function (mutations, observer) {{\n",
       "        var display = window.getComputedStyle(gd).display;\n",
       "        if (!display || display === 'none') {{\n",
       "            console.log([gd, 'removed!']);\n",
       "            Plotly.purge(gd);\n",
       "            observer.disconnect();\n",
       "        }}\n",
       "}});\n",
       "\n",
       "// Listen for the removal of the full notebook cells\n",
       "var notebookContainer = gd.closest('#notebook-container');\n",
       "if (notebookContainer) {{\n",
       "    x.observe(notebookContainer, {childList: true});\n",
       "}}\n",
       "\n",
       "// Listen for the clearing of the current output cell\n",
       "var outputEl = gd.closest('.output');\n",
       "if (outputEl) {{\n",
       "    x.observe(outputEl, {childList: true});\n",
       "}}\n",
       "\n",
       "                        })                };                });            </script>        </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "negative = df[df['Class']=='negative'].shape[0]\n",
    "compensated_hypothyroid= df[df['Class']=='compensated_hypothyroid'].shape[0]\n",
    "primary_hypothyroid=df[df['Class']=='primary_hypothyroid'].shape[0]\n",
    "secondary_hypothyroid=df[df['Class']=='secondary_hypothyroid'].shape[0]\n",
    "pe.bar(df,x=df['Class'].value_counts().index,y=df['Class'].value_counts().values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "d6e75911",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant', 'thyroid_surgery', 'I131_treatment', 'query_hypothyroid', 'query_hyperthyroid', 'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych', 'Class']\n"
     ]
    }
   ],
   "source": [
    "Categorical_Features=[cols for cols in df.columns if df[cols].dtypes=='O']\n",
    "print(Categorical_Features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0f0bd26b",
   "metadata": {},
   "outputs": [],
   "source": [
    "Categorical_Features.remove('Class')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "150faec2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['sex',\n",
       " 'on_thyroxine',\n",
       " 'query_on_thyroxine',\n",
       " 'on_antithyroid_medication',\n",
       " 'sick',\n",
       " 'pregnant',\n",
       " 'thyroid_surgery',\n",
       " 'I131_treatment',\n",
       " 'query_hypothyroid',\n",
       " 'query_hyperthyroid',\n",
       " 'lithium',\n",
       " 'goitre',\n",
       " 'tumor',\n",
       " 'hypopituitary',\n",
       " 'psych']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Categorical_Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "52adcfab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']\n"
     ]
    }
   ],
   "source": [
    "Numerical_Features=[cols for cols in df.columns if df[cols].dtypes!='O']\n",
    "print(Numerical_Features)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8a9adf61",
   "metadata": {},
   "source": [
    "Columns containing missing values are T3 ,T4U ,FTI ,TSH ,TT4 ,sex ,age from them one is categorical feature and others are numerical feature"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "73ec2cf9",
   "metadata": {},
   "source": [
    "lets do the separation of target and independent features "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "67b54cb3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3711, 21)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X=df.drop('Class',axis=1)\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d59ad6e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant', 'thyroid_surgery', 'I131_treatment', 'query_hypothyroid', 'query_hyperthyroid', 'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych']\n"
     ]
    }
   ],
   "source": [
    "Categorical_Features=[cols for cols in X.columns if X[cols].dtypes=='O']\n",
    "print(Categorical_Features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c7e8c9f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']\n"
     ]
    }
   ],
   "source": [
    "Numerical_Features=[cols for cols in X.columns if X[cols].dtypes!='O']\n",
    "print(Numerical_Features)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9dda8090",
   "metadata": {},
   "source": [
    "# Label Encoding and saving encoded label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "80142c2e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "820caffe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3711,)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y=df['Class']\n",
    "y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "2e6469a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['label_encoder.joblib']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Initialize the label encoder\n",
    "label_encoder = LabelEncoder()\n",
    "\n",
    "# Fit and transform the target variable\n",
    "y_encoded = label_encoder.fit_transform(y)\n",
    "\n",
    "# Save the label encoder using joblib\n",
    "import joblib\n",
    "joblib.dump(label_encoder, 'label_encoder.joblib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "2c896f7c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 2, 3])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(y_encoded)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "652859d1",
   "metadata": {},
   "source": [
    "Since we are doing classification problem we do not bother for transforming numerical features to noraml distribution "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "4affe7b6",
   "metadata": {},
   "source": [
    "Create a pipeline for Missing values handling and one hot encoding "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8815d1d0",
   "metadata": {},
   "source": [
    "# Experiment 1 : missing value imputation (Numerical) by Median and Handling imbalanced Dataset using Random Oversampling "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "27bdf003",
   "metadata": {},
   "outputs": [],
   "source": [
    "numeric_transformer =Pipeline(steps=[\n",
    "                        ('imputer',SimpleImputer(strategy='median',missing_values=np.nan)),\n",
    "                        ('robust_scaler',RobustScaler())])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "22ac85d6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;imputer&#x27;, SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                (&#x27;robust_scaler&#x27;, RobustScaler())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;imputer&#x27;, SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                (&#x27;robust_scaler&#x27;, RobustScaler())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RobustScaler</label><div class=\"sk-toggleable__content\"><pre>RobustScaler()</pre></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),\n",
       "                ('robust_scaler', RobustScaler())])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "numeric_transformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "6a5aaba6",
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_transformer = Pipeline(steps=[\n",
    "                        ('imputer', SimpleImputer(strategy='most_frequent')),\n",
    "                        ('onehot', OneHotEncoder(drop='first',handle_unknown='ignore'))])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "611a6f5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;imputer&#x27;, SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                (&#x27;onehot&#x27;,\n",
       "                 OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;))])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" ><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;imputer&#x27;, SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                (&#x27;onehot&#x27;,\n",
       "                 OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;))])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" ><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;most_frequent&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-6\" type=\"checkbox\" ><label for=\"sk-estimator-id-6\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "Pipeline(steps=[('imputer', SimpleImputer(strategy='most_frequent')),\n",
       "                ('onehot',\n",
       "                 OneHotEncoder(drop='first', handle_unknown='ignore'))])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categorical_transformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "dd4edbda",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['compensated_hypothyroid', 'negative', 'primary_hypothyroid',\n",
       "       'secondary_hypothyroid'], dtype=object)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "8bc23b3a",
   "metadata": {},
   "outputs": [],
   "source": [
    " preprocessor = ColumnTransformer([\n",
    "                            ('num',numeric_transformer,Numerical_Features),\n",
    "                            ('cat', categorical_transformer, Categorical_Features)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "44fbaf09",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                 (&#x27;robust_scaler&#x27;,\n",
       "                                                  RobustScaler())]),\n",
       "                                 [&#x27;age&#x27;, &#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]),\n",
       "                                (&#x27;cat&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                 (&#x27;onehot&#x27;,\n",
       "                                                  OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                 [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;,\n",
       "                                  &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;,\n",
       "                                  &#x27;pregnant&#x27;, &#x27;thyroid_surgery&#x27;,\n",
       "                                  &#x27;I131_treatment&#x27;, &#x27;query_hypothyroid&#x27;,\n",
       "                                  &#x27;query_hyperthyroid&#x27;, &#x27;lithium&#x27;, &#x27;goitre&#x27;,\n",
       "                                  &#x27;tumor&#x27;, &#x27;hypopituitary&#x27;, &#x27;psych&#x27;])])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" ><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                 (&#x27;robust_scaler&#x27;,\n",
       "                                                  RobustScaler())]),\n",
       "                                 [&#x27;age&#x27;, &#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]),\n",
       "                                (&#x27;cat&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                 (&#x27;onehot&#x27;,\n",
       "                                                  OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                 [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;,\n",
       "                                  &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;,\n",
       "                                  &#x27;pregnant&#x27;, &#x27;thyroid_surgery&#x27;,\n",
       "                                  &#x27;I131_treatment&#x27;, &#x27;query_hypothyroid&#x27;,\n",
       "                                  &#x27;query_hyperthyroid&#x27;, &#x27;lithium&#x27;, &#x27;goitre&#x27;,\n",
       "                                  &#x27;tumor&#x27;, &#x27;hypopituitary&#x27;, &#x27;psych&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" ><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>[&#x27;age&#x27;, &#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" ><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RobustScaler</label><div class=\"sk-toggleable__content\"><pre>RobustScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;, &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;, &#x27;pregnant&#x27;, &#x27;thyroid_surgery&#x27;, &#x27;I131_treatment&#x27;, &#x27;query_hypothyroid&#x27;, &#x27;query_hyperthyroid&#x27;, &#x27;lithium&#x27;, &#x27;goitre&#x27;, &#x27;tumor&#x27;, &#x27;hypopituitary&#x27;, &#x27;psych&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;most_frequent&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-13\" type=\"checkbox\" ><label for=\"sk-estimator-id-13\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "ColumnTransformer(transformers=[('num',\n",
       "                                 Pipeline(steps=[('imputer',\n",
       "                                                  SimpleImputer(strategy='median')),\n",
       "                                                 ('robust_scaler',\n",
       "                                                  RobustScaler())]),\n",
       "                                 ['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI']),\n",
       "                                ('cat',\n",
       "                                 Pipeline(steps=[('imputer',\n",
       "                                                  SimpleImputer(strategy='most_frequent')),\n",
       "                                                 ('onehot',\n",
       "                                                  OneHotEncoder(drop='first',\n",
       "                                                                handle_unknown='ignore'))]),\n",
       "                                 ['sex', 'on_thyroxine', 'query_on_thyroxine',\n",
       "                                  'on_antithyroid_medication', 'sick',\n",
       "                                  'pregnant', 'thyroid_surgery',\n",
       "                                  'I131_treatment', 'query_hypothyroid',\n",
       "                                  'query_hyperthyroid', 'lithium', 'goitre',\n",
       "                                  'tumor', 'hypopituitary', 'psych'])])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preprocessor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "2ac63d58",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_scaled=preprocessor.fit_transform(X)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "5285b3a2",
   "metadata": {},
   "source": [
    "# Feature Selection (top 10 )using SelectKbest \n",
    "\n",
    "SelectKBest: SelectKBest is a univariate feature selection method that selects the K best features based on their statistical significance. It works by scoring each feature individually and selecting the top K features with the highest scores. This method is simple and efficient for datasets with a small number of features, but it may not work well for datasets with high dimensionality or where features interact with each other."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "adbb2409",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Selected features: ['sex' 'on_thyroxine' 'query_on_thyroxine' 'on_antithyroid_medication'\n",
      " 'sick' 'pregnant' 'thyroid_surgery' 'query_hyperthyroid' 'lithium'\n",
      " 'hypopituitary' 'FTI']\n"
     ]
    }
   ],
   "source": [
    "# Apply feature selection\n",
    "selector = SelectKBest(f_classif, k=11)\n",
    "X_selected = selector.fit_transform(X_scaled,y_encoded)\n",
    "\n",
    "# Get the original feature names for the selected features\n",
    "mask = selector.get_support()\n",
    "selected_features = np.array(X.columns)[mask]\n",
    "print(\"Selected features:\", selected_features)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "398948a9",
   "metadata": {},
   "source": [
    "# Feature Selection using Recursive Method \n",
    "Recursive Feature Elimination (RFE): RFE is a feature selection method that recursively removes features from the dataset, based on their importance or weight. It works by training a model and then ranking the features based on their importance scores. The least important feature is then removed, and the process is repeated until the desired number of features is obtained. RFE can work well for datasets with complex feature interactions, but it can be computationally expensive and sensitive to the choice of the model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "512ee976",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting estimator with 21 features.\n",
      "Fitting estimator with 20 features.\n",
      "Fitting estimator with 19 features.\n",
      "Fitting estimator with 18 features.\n",
      "Fitting estimator with 17 features.\n",
      "Fitting estimator with 16 features.\n",
      "['age', 'sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant', 'thyroid_surgery', 'query_hyperthyroid', 'goitre', 'tumor', 'hypopituitary', 'psych', 'TT4', 'FTI']\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_selection import RFE\n",
    "# create a linear regression object\n",
    "model =RandomForestClassifier()\n",
    "# create the RFE model and select 5 attributes\n",
    "rfe = RFE(model, n_features_to_select=15, verbose=1)\n",
    "# fit the RFE model to the data\n",
    "rfe.fit(X_scaled, y_encoded)\n",
    "# get the selected feature indices\n",
    "selected_features = rfe.get_support(indices=True)\n",
    "# get the original feature names\n",
    "original_feature_names = list(X.columns[selected_features])\n",
    "# print the original feature names\n",
    "print(original_feature_names)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "ba6a5f58",
   "metadata": {},
   "source": [
    "# Feature Selection with CrossValidation Recursive method\n",
    "\n",
    "Recursive Feature Elimination with Cross-Validation (RFECV): RFECV is an extension of RFE that uses cross-validation to automatically select the optimal number of features. It works by dividing the data into training and validation sets and applying RFE on the training set. The optimal number of features is then determined by selecting the number of features that results in the highest cross-validation score. RFECV can be a good choice when the number of features is high and the optimal number of features is unknown."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "318ba084",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimal number of features : 15\n",
      "Selected features:  ['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4', 'Feature 5', 'Feature 6', 'Feature 8', 'Feature 13', 'Feature 14', 'Feature 15', 'Feature 16', 'Feature 17', 'Feature 18', 'Feature 19', 'Feature 20']\n",
      "Feature rankings:  ['Feature 1 (1)', 'Feature 2 (1)', 'Feature 3 (1)', 'Feature 4 (1)', 'Feature 5 (1)', 'Feature 6 (1)', 'Feature 7 (2)', 'Feature 8 (1)', 'Feature 9 (4)', 'Feature 10 (6)', 'Feature 11 (7)', 'Feature 12 (5)', 'Feature 13 (1)', 'Feature 14 (1)', 'Feature 15 (1)', 'Feature 16 (1)', 'Feature 17 (1)', 'Feature 18 (1)', 'Feature 19 (1)', 'Feature 20 (1)', 'Feature 21 (3)']\n",
      "CV scores:  [[0.95558546 0.96226415 0.9690027  0.97169811 0.95013477]\n",
      " [0.96635262 0.96765499 0.96091644 0.97708895 0.96495957]\n",
      " [0.98519515 0.98787062 0.98247978 0.98787062 0.98652291]\n",
      " [0.98788694 0.99191375 0.9851752  0.98921833 0.98787062]\n",
      " [0.99327052 0.99460916 0.99056604 0.99730458 0.99326146]\n",
      " [0.98923284 0.99326146 0.99056604 0.99460916 0.99326146]\n",
      " [0.99192463 0.99460916 0.99191375 0.99730458 0.99460916]\n",
      " [0.99057873 0.99326146 0.99326146 0.99730458 0.99326146]\n",
      " [0.99192463 0.99460916 0.98921833 0.99730458 0.99460916]\n",
      " [0.99192463 0.99460916 0.99326146 0.99730458 0.99460916]\n",
      " [0.99057873 0.99595687 0.99326146 0.99730458 0.99460916]\n",
      " [0.99057873 0.99460916 0.99326146 0.99730458 0.99595687]\n",
      " [0.99192463 0.99595687 0.99056604 0.99730458 0.99326146]\n",
      " [0.99057873 0.99595687 0.99326146 0.99730458 0.99595687]\n",
      " [0.99192463 0.99595687 0.99326146 0.99865229 0.99460916]\n",
      " [0.99057873 0.99460916 0.99191375 0.99730458 0.99595687]\n",
      " [0.99192463 0.99595687 0.99191375 0.99730458 0.99326146]\n",
      " [0.99192463 0.99460916 0.99326146 0.99730458 0.99460916]\n",
      " [0.99192463 0.99595687 0.98921833 0.99730458 0.99460916]\n",
      " [0.99192463 0.99326146 0.99326146 0.99730458 0.99595687]\n",
      " [0.99192463 0.99595687 0.99326146 0.99730458 0.99460916]]\n",
      "Selected feature names:  ['Feature 1', 'Feature 2', 'Feature 3', 'Feature 4', 'Feature 5', 'Feature 6', 'Feature 8', 'Feature 13', 'Feature 14', 'Feature 15', 'Feature 16', 'Feature 17', 'Feature 18', 'Feature 19', 'Feature 20']\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_selection import RFECV\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "\n",
    "# create a logistic regression model\n",
    "clf = DecisionTreeClassifier()\n",
    "\n",
    "# create a recursive feature elimination object\n",
    "rfecv = RFECV(estimator=clf, step=1, cv=StratifiedKFold(5))\n",
    "\n",
    "# fit the recursive feature elimination object to the data\n",
    "rfecv.fit(X_scaled, y_encoded)\n",
    "\n",
    "# print out the selected features\n",
    "print(\"Optimal number of features : %d\" % rfecv.n_features_)\n",
    "print(\"Selected features: \", [f\"Feature {i+1}\" for i, s in enumerate(rfecv.support_) if s])\n",
    "\n",
    "# print out the ranking of all features\n",
    "print(\"Feature rankings: \", [f\"Feature {i+1} ({rank})\" for i, rank in enumerate(rfecv.ranking_)])\n",
    "\n",
    "# print out the cross-validation scores for each number of features\n",
    "print(\"CV scores: \", rfecv.grid_scores_)\n",
    "\n",
    "# get the original feature names\n",
    "feature_names = [f\"Feature {i+1}\" for i in range(X.shape[1])]\n",
    "\n",
    "# create a list of selected feature names\n",
    "selected_feature_names = [feature_names[i] for i, s in enumerate(rfecv.support_) if s]\n",
    "\n",
    "# print out the selected feature names\n",
    "print(\"Selected feature names: \", selected_feature_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "3e2c4373",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['sex', 'on_thyroxine', 'query_on_thyroxine',\n",
       "       'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment',\n",
       "       'tumor', 'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.columns[[1,2,3,4,5,6,8,13,14,15,16,17,18,19,20]]"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "6ca4c27e",
   "metadata": {},
   "source": [
    "#Feature selected using RFECV 'sex', 'on_thyroxine', 'query_on_thyroxine',\n",
    "       'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment',\n",
    "       'tumor', 'psych', 'TT4', 'T4U', 'FTI']"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a71d5309",
   "metadata": {},
   "source": [
    "# Create Functions for model training and evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "9b4b003a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def total_cost(y_true, y_pred):\n",
    "    '''\n",
    "    This function takes y_true, y_pred, and prints Total cost due to misclassification\n",
    "    '''\n",
    "    cm = confusion_matrix(y_true, y_pred)\n",
    "    cost = 0\n",
    "    for i in range(cm.shape[0]):\n",
    "        fp = sum(cm[:,i]) - cm[i,i]\n",
    "        fn = sum(cm[i,:]) - cm[i,i]\n",
    "        cost += 10*fp + 500*fn\n",
    "    return cost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "5a7834ff",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3711"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "1b0a22d3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3711"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_encoded.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "6c06b9c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_clf(true, predicted):\n",
    "            '''\n",
    "            This function takes in true values and predicted values\n",
    "            Returns: Accuracy, F1-Score, Precision, Recall, Roc-auc Score\n",
    "            '''\n",
    "            acc = accuracy_score(true, predicted) # Calculate Accuracy\n",
    "            f1 = f1_score(true, predicted,average='weighted') # Calculate F1-score\n",
    "            precision = precision_score(true, predicted,average='weighted') # Calculate Precision\n",
    "            recall = recall_score(true, predicted,average='weighted')  # Calculate Recall\n",
    "            #roc_auc = roc_auc_score(true, predicted,average='weighted',multi_class='ovr') #Calculate Roc\n",
    "            return acc, f1 , precision, recall"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "79c1e028",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a function which can evaluate models and return a report \n",
    "def evaluate_models(X, y, models):\n",
    "    '''\n",
    "    This function takes in X and y and models dictionary as input\n",
    "    It splits the data into Train Test split\n",
    "    Iterates through the given model dictionary and evaluates the metrics\n",
    "    Returns: Dataframe which contains report of all models metrics with cost\n",
    "    '''\n",
    "    cost_list=[]\n",
    "    models_list = []\n",
    "    accuracy_list = []\n",
    "    train_recall=[]\n",
    "    test_recall=[]\n",
    "    # separate dataset into train and test\n",
    "    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)\n",
    "    \n",
    "    # Create cost of the model as per data description\n",
    "    \n",
    "\n",
    "    for i in range(len(list(models))):\n",
    "        model = list(models.values())[i]\n",
    "        model.fit(X_train, y_train) # Train model\n",
    "\n",
    "        # Make predictions\n",
    "        y_train_pred = model.predict(X_train)\n",
    "        y_test_pred = model.predict(X_test)\n",
    "\n",
    "        # Training set performance\n",
    "        model_train_accuracy, model_train_f1,model_train_precision,\\\n",
    "        model_train_recall=evaluate_clf(y_train ,y_train_pred)\n",
    "        train_recall.append(model_train_recall)\n",
    "        train_cost = total_cost(y_train, y_train_pred)\n",
    "\n",
    "\n",
    "        # Test set performance\n",
    "        model_test_accuracy,model_test_f1,model_test_precision,\\\n",
    "        model_test_recall=evaluate_clf(y_test, y_test_pred)\n",
    "        test_recall.append(model_test_recall)\n",
    "        test_cost = total_cost(y_test, y_test_pred)\n",
    "\n",
    "        print(list(models.keys())[i])\n",
    "        models_list.append(list(models.keys())[i])\n",
    "\n",
    "        print('Model performance for Training set')\n",
    "        print(\"- Accuracy: {:.4f}\".format(model_train_accuracy))\n",
    "        print('- F1 score: {:.4f}'.format(model_train_f1)) \n",
    "        print('- Precision: {:.4f}'.format(model_train_precision))\n",
    "        print('- Recall: {:.4f}'.format(model_train_recall))\n",
    "        #print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))\n",
    "        print(f'- COST: {train_cost}.')\n",
    "\n",
    "        print('----------------------------------')\n",
    "\n",
    "        print('Model performance for Test set')\n",
    "        print('- Accuracy: {:.4f}'.format(model_test_accuracy))\n",
    "        print('- F1 score: {:.4f}'.format(model_test_f1))\n",
    "        print('- Precision: {:.4f}'.format(model_test_precision))\n",
    "        print('- Recall: {:.4f}'.format(model_test_recall))\n",
    "        #print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))\n",
    "        print(f'- COST: {test_cost}.')\n",
    "        cost_list.append(test_cost)\n",
    "        print('='*35)\n",
    "        print('\\n')\n",
    "        \n",
    "    report=pd.DataFrame(list(zip(models_list, cost_list,train_recall,test_recall,)), columns=['Model Name', 'Cost','Train_Recall','Test_Recall']).sort_values(by=[\"Cost\"])\n",
    "        \n",
    "    return report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "c34e12a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['compensated_hypothyroid', 'negative', 'primary_hypothyroid',\n",
       "       'secondary_hypothyroid'], dtype=object)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.unique(y)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "068d72ae",
   "metadata": {},
   "source": [
    "#Lets build model and check performance of model using our sleected features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "dd757025",
   "metadata": {},
   "outputs": [],
   "source": [
    "random_over_sample=RandomOverSampler(random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "c1f231c2",
   "metadata": {},
   "outputs": [],
   "source": [
    " X_resampled,y_resampled=random_over_sample.fit_resample(X_scaled,y_encoded)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "de4953c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13680, 21)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_resampled.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "0fe21161",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(13680,)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_resampled.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "5c8870fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='count'>"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAS5UlEQVR4nO3db4xV953f8fcn2OvQTazY8tglQIoVkW2x28XyiFq1tJsmaU0jtZDtZoWljVHrisiyV4m0qmTvg27SCmml5o/W6cYSUbyGKI2F6qRmo3hbgvJH2Tomg0WMgdCgtWtPoDBJGgX3ARXk2wf3h/YKLnMGPPfeGeb9ko7uud97fud+58rmM+ec3z2TqkKSpNm8ZdwNSJIWPsNCktTJsJAkdTIsJEmdDAtJUqfrxt3AsNxyyy21Zs2acbchSYvKgQMHflpVExfXr9mwWLNmDVNTU+NuQ5IWlST/a1Dd01CSpE5DC4skb02yP8kPkxxO8slW/0SSnyQ52JYP9o15LMnxJMeS3NdXvzvJofba40kyrL4lSZca5mmos8D7quqNJNcD30vyXHvts1X1qf6Nk6wDtgB3AO8EvpnkPVV1HngC2AZ8H/gGsBF4DknSSAztyKJ63mhPr2/LbPcW2QQ8XVVnq+oV4DiwIckK4Maqer569ybZBWweVt+SpEsN9ZpFkmVJDgKngb1V9UJ76ZEkLyV5MslNrbYSeL1v+HSrrWzrF9cHvd+2JFNJpmZmZubzR5GkJW2oYVFV56tqPbCK3lHCnfROKb0bWA+cBD7dNh90HaJmqQ96vx1VNVlVkxMTl8z8kiRdpZHMhqqqXwDfBjZW1akWIr8CvgBsaJtNA6v7hq0CTrT6qgF1SdKIDHM21ESSd7T15cAHgB+1axAXfAh4ua3vAbYkuSHJ7cBaYH9VnQTOJLmnzYJ6AHh2WH1Lki41zNlQK4CdSZbRC6XdVfX1JF9Ksp7eqaRXgY8CVNXhJLuBI8A54OE2EwrgIeApYDm9WVDOhJKkEcq1+sePJicna7ZvcN/9b3eNsJuF7cB/fOBN7+O1f//356GTa8O7/t2hNzX+3s/dO0+dLH5/9Qd/9ab38Z3f+u156OTa8Nvf/U7nNkkOVNXkxXW/wS1J6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqNLSwSPLWJPuT/DDJ4SSfbPWbk+xN8uP2eFPfmMeSHE9yLMl9ffW7kxxqrz2eJMPqW5J0qWEeWZwF3ldVvwmsBzYmuQd4FNhXVWuBfe05SdYBW4A7gI3A55Msa/t6AtgGrG3LxiH2LUm6yNDConreaE+vb0sBm4Cdrb4T2NzWNwFPV9XZqnoFOA5sSLICuLGqnq+qAnb1jZEkjcBQr1kkWZbkIHAa2FtVLwC3VdVJgPZ4a9t8JfB63/DpVlvZ1i+uD3q/bUmmkkzNzMzM688iSUvZUMOiqs5X1XpgFb2jhDtn2XzQdYiapT7o/XZU1WRVTU5MTFxxv5KkwUYyG6qqfgF8m961hlPt1BLt8XTbbBpY3TdsFXCi1VcNqEuSRmSYs6EmkryjrS8HPgD8CNgDbG2bbQWebet7gC1JbkhyO70L2fvbqaozSe5ps6Ae6BsjSRqB64a47xXAzjaj6S3A7qr6epLngd1JHgReAz4MUFWHk+wGjgDngIer6nzb10PAU8By4Lm2SJJGZGhhUVUvAXcNqP8MeP9lxmwHtg+oTwGzXe+QJA2R3+CWJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdRpaWCRZneRbSY4mOZzkY63+iSQ/SXKwLR/sG/NYkuNJjiW5r69+d5JD7bXHk2RYfUuSLnXdEPd9DvjDqnoxyduBA0n2ttc+W1Wf6t84yTpgC3AH8E7gm0neU1XngSeAbcD3gW8AG4Hnhti7JKnP0I4squpkVb3Y1s8AR4GVswzZBDxdVWer6hXgOLAhyQrgxqp6vqoK2AVsHlbfkqRLjeSaRZI1wF3AC630SJKXkjyZ5KZWWwm83jdsutVWtvWL64PeZ1uSqSRTMzMz8/kjSNKSNvSwSPI24Bng41X1S3qnlN4NrAdOAp++sOmA4TVL/dJi1Y6qmqyqyYmJiTfbuiSpGWpYJLmeXlB8uaq+ClBVp6rqfFX9CvgCsKFtPg2s7hu+CjjR6qsG1CVJIzLM2VABvggcrarP9NVX9G32IeDltr4H2JLkhiS3A2uB/VV1EjiT5J62zweAZ4fVtyTpUsOcDXUv8BHgUJKDrfZHwP1J1tM7lfQq8FGAqjqcZDdwhN5MqofbTCiAh4CngOX0ZkE5E0qSRmhoYVFV32Pw9YZvzDJmO7B9QH0KuHP+upMkXQm/wS1J6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoNLSySrE7yrSRHkxxO8rFWvznJ3iQ/bo839Y15LMnxJMeS3NdXvzvJofba40kyrL4lSZca5pHFOeAPq+rvAfcADydZBzwK7KuqtcC+9pz22hbgDmAj8Pkky9q+ngC2AWvbsnGIfUuSLjK0sKiqk1X1Yls/AxwFVgKbgJ1ts53A5ra+CXi6qs5W1SvAcWBDkhXAjVX1fFUVsKtvjCRpBEZyzSLJGuAu4AXgtqo6Cb1AAW5tm60EXu8bNt1qK9v6xfVB77MtyVSSqZmZmXn9GSRpKZtTWCTZN5faZca+DXgG+HhV/XK2TQfUapb6pcWqHVU1WVWTExMTc2lPkjQH1832YpK3An8LuKVdiL7wD/eNwDu7dp7kenpB8eWq+morn0qyoqpOtlNMp1t9GljdN3wVcKLVVw2oS5JGpOvI4qPAAeDvtscLy7PAn802sM1Y+iJwtKo+0/fSHmBrW9/a9nWhviXJDUlup3che387VXUmyT1tnw/0jZEkjcCsRxZV9afAnyb5g6r63BXu+17gI8ChJAdb7Y+APwF2J3kQeA34cHuvw0l2A0fozaR6uKrOt3EPAU8By4Hn2iJJGpFZw+KCqvpckn8ErOkfU1W7ZhnzPQZfbwB4/2XGbAe2D6hPAXfOpVdJ0vybU1gk+RLwbuAgcOG3/QvTWCVJ17g5hQUwCaxr33OQJC0xc/2excvA3x5mI5KkhWuuRxa3AEeS7AfOXihW1b8YSleSpAVlrmHxiWE2IUla2OY6G+o7w25EkrRwzXU21Bn+5hYbvwZcD/zfqrpxWI1JkhaOuR5ZvL3/eZLNwIZhNCRJWniu6q6zVfVfgffNbyuSpIVqrqehfqfv6Vvofe/C71xI0hIx19lQ/7xv/RzwKr0/ViRJWgLmes3iXw27EUnSwjXXP360KsnXkpxOcirJM0lWdY+UJF0L5nqB+8/p/b2Jd9L7k6Z/0WqSpCVgrmExUVV/XlXn2vIU4N8tlaQlYq5h8dMkv59kWVt+H/jZMBuTJC0ccw2Lfw38HvC/gZPA7wJe9JakJWKuU2f/A7C1qv4PQJKbgU/RCxFJ0jVurkcW/+BCUABU1c+Bu4bTkiRpoZlrWLwlyU0XnrQji7kelUiSFrm5/oP/aeB/JPkv9G7z8XvA9qF1JUlaUOZ0ZFFVu4B/CZwCZoDfqaovzTYmyZPtS3wv99U+keQnSQ625YN9rz2W5HiSY0nu66vfneRQe+3xJLnSH1KS9ObM+VRSVR0BjlzBvp8C/hOw66L6Z6vqU/2FJOuALcAd9L74980k76mq88ATwDbg+8A3gI3Ac1fQhyTpTbqqW5TPRVV9F/j5HDffBDxdVWer6hXgOLAhyQrgxqp6vqqKXvBsHkrDkqTLGlpYzOKRJC+101QXLpqvBF7v22a61Va29YvrAyXZlmQqydTMzMx89y1JS9aow+IJ4N3Aenpf7vt0qw+6DlGz1Aeqqh1VNVlVkxMT3o1EkubLSMOiqk5V1fmq+hXwBf7mT7NOA6v7Nl0FnGj1VQPqkqQRGmlYtGsQF3wIuDBTag+wJckNSW4H1gL7q+okcCbJPW0W1APAs6PsWZI0xC/WJfkK8F7gliTTwB8D702ynt6ppFeBjwJU1eEku+nNtjoHPNxmQgE8RG9m1XJ6s6CcCSVJIza0sKiq+weUvzjL9tsZ8EW/qpoC7pzH1iRJV2gcs6EkSYuMYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqdPQwiLJk0lOJ3m5r3Zzkr1Jftweb+p77bEkx5McS3JfX/3uJIfaa48nybB6liQNNswji6eAjRfVHgX2VdVaYF97TpJ1wBbgjjbm80mWtTFPANuAtW25eJ+SpCEbWlhU1XeBn19U3gTsbOs7gc199aer6mxVvQIcBzYkWQHcWFXPV1UBu/rGSJJGZNTXLG6rqpMA7fHWVl8JvN633XSrrWzrF9cHSrItyVSSqZmZmXltXJKWsoVygXvQdYiapT5QVe2oqsmqmpyYmJi35iRpqRt1WJxqp5Zoj6dbfRpY3bfdKuBEq68aUJckjdCow2IPsLWtbwWe7atvSXJDktvpXcje305VnUlyT5sF9UDfGEnSiFw3rB0n+QrwXuCWJNPAHwN/AuxO8iDwGvBhgKo6nGQ3cAQ4BzxcVefbrh6iN7NqOfBcWyRJIzS0sKiq+y/z0vsvs/12YPuA+hRw5zy2Jkm6QgvlArckaQEzLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdRpLWCR5NcmhJAeTTLXazUn2Jvlxe7ypb/vHkhxPcizJfePoWZKWsnEeWfzjqlpfVZPt+aPAvqpaC+xrz0myDtgC3AFsBD6fZNk4GpakpWohnYbaBOxs6zuBzX31p6vqbFW9AhwHNoy+PUlausYVFgX89yQHkmxrtduq6iRAe7y11VcCr/eNnW41SdKIXDem9723qk4kuRXYm+RHs2ybAbUauGEveLYBvOtd73rzXUqSgDEdWVTVifZ4GvgavdNKp5KsAGiPp9vm08DqvuGrgBOX2e+OqpqsqsmJiYlhtS9JS87IwyLJryd5+4V14J8CLwN7gK1ts63As219D7AlyQ1JbgfWAvtH27UkLW3jOA11G/C1JBfe/z9X1V8m+QGwO8mDwGvAhwGq6nCS3cAR4BzwcFWdH0PfkrRkjTwsquqvgd8cUP8Z8P7LjNkObB9ya5Kky1hIU2clSQuUYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqtGjCIsnGJMeSHE/y6Lj7kaSlZFGERZJlwJ8B/wxYB9yfZN14u5KkpWNRhAWwATheVX9dVf8PeBrYNOaeJGnJSFWNu4dOSX4X2FhV/6Y9/wjwD6vqkYu22wZsa09/Azg20kavzi3AT8fdxDXCz3J++XnOr8Xyef6dqpq4uHjdODq5ChlQuyTlqmoHsGP47cyfJFNVNTnuPq4Ffpbzy89zfi32z3OxnIaaBlb3PV8FnBhTL5K05CyWsPgBsDbJ7Ul+DdgC7BlzT5K0ZCyK01BVdS7JI8B/A5YBT1bV4TG3NV8W1WmzBc7Pcn75ec6vRf15LooL3JKk8Vosp6EkSWNkWEiSOhkWY+LtS+ZPkieTnE7y8rh7uRYkWZ3kW0mOJjmc5GPj7mmxSvLWJPuT/LB9lp8cd09Xy2sWY9BuX/I/gX9Cb1rwD4D7q+rIWBtbpJL8FvAGsKuq7hx3P4tdkhXAiqp6McnbgQPAZv/7vHJJAvx6Vb2R5Hrge8DHqur7Y27tinlkMR7evmQeVdV3gZ+Pu49rRVWdrKoX2/oZ4CiwcrxdLU7V80Z7en1bFuVv6IbFeKwEXu97Po3/M2oBSrIGuAt4YcytLFpJliU5CJwG9lbVovwsDYvxmNPtS6RxSvI24Bng41X1y3H3s1hV1fmqWk/vzhMbkizKU6WGxXh4+xItaO38+jPAl6vqq+Pu51pQVb8Avg1sHG8nV8ewGA9vX6IFq12U/SJwtKo+M+5+FrMkE0ne0daXAx8AfjTWpq6SYTEGVXUOuHD7kqPA7mvo9iUjl+QrwPPAbySZTvLguHta5O4FPgK8L8nBtnxw3E0tUiuAbyV5id4viXur6utj7umqOHVWktTJIwtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1+v+vTWH7Y/1rgQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(y_resampled)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "1dd304bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Dictionary which contains models for experiment\n",
    "models = {\n",
    "    \"Random Forest\": RandomForestClassifier(),\n",
    "    \"Decision Tree\": DecisionTreeClassifier(),\n",
    "    \"Gradient Boosting\": GradientBoostingClassifier(),\n",
    "     \"K-Neighbors Classifier\": KNeighborsClassifier(),\n",
    "    \"XGBClassifier\": XGBClassifier(), \n",
    "     \"CatBoosting Classifier\": CatBoostClassifier(verbose=False),\n",
    "    \"AdaBoost Classifier\": AdaBoostClassifier()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "5e3a6b13",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9993\n",
      "- F1 score: 0.9993\n",
      "- Precision: 0.9993\n",
      "- Recall: 0.9993\n",
      "- COST: 1020.\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9996\n",
      "- F1 score: 0.9996\n",
      "- Precision: 0.9996\n",
      "- Recall: 0.9996\n",
      "- COST: 510.\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boosting\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9993\n",
      "- F1 score: 0.9993\n",
      "- Precision: 0.9993\n",
      "- Recall: 0.9993\n",
      "- COST: 1020.\n",
      "===================================\n",
      "\n",
      "\n",
      "K-Neighbors Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9938\n",
      "- F1 score: 0.9938\n",
      "- Precision: 0.9939\n",
      "- Recall: 0.9938\n",
      "- COST: 34680.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9934\n",
      "- F1 score: 0.9934\n",
      "- Precision: 0.9936\n",
      "- Recall: 0.9934\n",
      "- COST: 9180.\n",
      "===================================\n",
      "\n",
      "\n",
      "XGBClassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9993\n",
      "- F1 score: 0.9993\n",
      "- Precision: 0.9993\n",
      "- Recall: 0.9993\n",
      "- COST: 1020.\n",
      "===================================\n",
      "\n",
      "\n",
      "CatBoosting Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9993\n",
      "- F1 score: 0.9993\n",
      "- Precision: 0.9993\n",
      "- Recall: 0.9993\n",
      "- COST: 1020.\n",
      "===================================\n",
      "\n",
      "\n",
      "AdaBoost Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.6927\n",
      "- F1 score: 0.6181\n",
      "- Precision: 0.5976\n",
      "- Recall: 0.6927\n",
      "- COST: 1715130.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.6970\n",
      "- F1 score: 0.6220\n",
      "- Precision: 0.5999\n",
      "- Recall: 0.6970\n",
      "- COST: 422790.\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model Name</th>\n",
       "      <th>Cost</th>\n",
       "      <th>Train_Recall</th>\n",
       "      <th>Test_Recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>510</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999635</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>1020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gradient Boosting</td>\n",
       "      <td>1020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>XGBClassifier</td>\n",
       "      <td>1020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CatBoosting Classifier</td>\n",
       "      <td>1020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K-Neighbors Classifier</td>\n",
       "      <td>9180</td>\n",
       "      <td>0.993787</td>\n",
       "      <td>0.993421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AdaBoost Classifier</td>\n",
       "      <td>422790</td>\n",
       "      <td>0.692708</td>\n",
       "      <td>0.697003</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Model Name    Cost  Train_Recall  Test_Recall\n",
       "1           Decision Tree     510      1.000000     0.999635\n",
       "0           Random Forest    1020      1.000000     0.999269\n",
       "2       Gradient Boosting    1020      1.000000     0.999269\n",
       "4           XGBClassifier    1020      1.000000     0.999269\n",
       "5  CatBoosting Classifier    1020      1.000000     0.999269\n",
       "3  K-Neighbors Classifier    9180      0.993787     0.993421\n",
       "6     AdaBoost Classifier  422790      0.692708     0.697003"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report1 = evaluate_models(X_resampled, y_resampled, models)\n",
    "report1"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9b66f1b3",
   "metadata": {},
   "source": [
    "# Lets do CrossValidation "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "e2998911",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import make_scorer, recall_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "04dae4e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold,cross_val_score,StratifiedKFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "14ff0ee6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "StratifiedKFold(n_splits=5, random_state=42, shuffle=True)"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n",
    "cv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "de597616",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split the dataset into training and testing data\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "59c747a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use cross-validation to evaluate model performance\n",
    "precision_scores = cross_val_score(RandomForestClassifier(), X_train, y_train, cv=cv, scoring=\"precision_macro\")\n",
    "recall_scores = cross_val_score(RandomForestClassifier(), X_train, y_train, cv=cv, scoring=\"recall_macro\")\n",
    "f1_scores = cross_val_score(RandomForestClassifier(), X_train, y_train, cv=cv, scoring=\"f1_macro\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "64bf0a84",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision_random-forest: 0.9987477990446478\n",
      "Recall_random_forest: 0.998952659140549\n",
      "F1-score_random_forest: 0.9986362975820657\n"
     ]
    }
   ],
   "source": [
    "# Output evaluation metrics for a random forest model\n",
    "print(\"Precision_random-forest:\", precision_scores.mean())\n",
    "print(\"Recall_random_forest:\", recall_scores.mean())\n",
    "print(\"F1-score_random_forest:\", f1_scores.mean())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8035fc85",
   "metadata": {},
   "source": [
    "#Crossvalidation for test data  of Random Forest "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "5d504cdc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision_random_forest: 0.999029889015916\n",
      "Recall_random_forest: 0.9992732558139534\n",
      "F1-score_forset: 0.9992735433066741\n"
     ]
    }
   ],
   "source": [
    "# Calculate random forest model evaluation metrics\n",
    "from sklearn.metrics import precision_score, recall_score, f1_score\n",
    "\n",
    "# Precision score\n",
    "precision_rf = precision_score(y_test, RandomForestClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "print(\"Precision_random_forest:\", precision_rf)\n",
    "\n",
    "# Recall score\n",
    "recall_rf = recall_score(y_test, RandomForestClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "print(\"Recall_random_forest:\", recall_rf)\n",
    "\n",
    "# F1-score\n",
    "f1_rf = f1_score(y_test, RandomForestClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "print(\"F1-score_forset:\", f1_rf)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9ee0d6af",
   "metadata": {},
   "source": [
    "# Similrly for Decision Tree "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b4018397",
   "metadata": {},
   "source": [
    "Cross validation check for traindataset "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "d330bee1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Precision_random-forest: 0.9993756279631161\n",
      "Recall_random_forest: 0.9991623027464189\n",
      "F1-score_random_forest: 0.9993724746218644\n",
      "Precision_random_forest: 0.9995144734063415\n",
      "Recall_random_forest: 0.9992732558139534\n",
      "F1-score_forest: 0.9992702468178922\n"
     ]
    }
   ],
   "source": [
    "# Use cross-validation to evaluate model performance\n",
    "precision_scores = cross_val_score(DecisionTreeClassifier(), X_train, y_train, cv=cv, scoring=\"precision_macro\")\n",
    "recall_scores = cross_val_score(DecisionTreeClassifier(), X_train, y_train, cv=cv, scoring=\"recall_macro\")\n",
    "f1_scores = cross_val_score(DecisionTreeClassifier(), X_train, y_train, cv=cv, scoring=\"f1_macro\")\n",
    "\n",
    "# Output evaluation metrics for a random forest model\n",
    "print(\"Precision_random-forest:\", precision_scores.mean())\n",
    "print(\"Recall_random_forest:\", recall_scores.mean())\n",
    "print(\"F1-score_random_forest:\", f1_scores.mean())\n",
    "\n",
    "# Calculate random forest model evaluation metrics\n",
    "from sklearn.metrics import precision_score, recall_score, f1_score\n",
    "\n",
    "def random_forest_metrics(X_train, X_test, y_train, y_test):\n",
    "    \"\"\"\n",
    "    Calculate precision, recall, and F1 score for a random forest model.\n",
    "\n",
    "    Parameters:\n",
    "    X_train (numpy array): training input data\n",
    "    X_test (numpy array): testing input data\n",
    "    y_train (numpy array): training output data\n",
    "    y_test (numpy array): testing output data\n",
    "\n",
    "    Returns:\n",
    "    None\n",
    "    \"\"\"\n",
    "    # Precision score\n",
    "    precision_rf = precision_score(y_test, DecisionTreeClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "    print(\"Precision_random_forest:\", precision_rf)\n",
    "\n",
    "    # Recall score\n",
    "    recall_rf = recall_score(y_test, DecisionTreeClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "    print(\"Recall_random_forest:\", recall_rf)\n",
    "\n",
    "    # F1-score\n",
    "    f1_rf = f1_score(y_test, DecisionTreeClassifier().fit(X_train,y_train).predict(X_test), average=\"macro\")\n",
    "    print(\"F1-score_forest:\", f1_rf)\n",
    "\n",
    "random_forest_metrics(X_train, X_test, y_train, y_test)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8b6cb86c",
   "metadata": {},
   "source": [
    "#After cross validation  Decison Tree is giving 99.9 % for Train Dataset and 99.7% for Test Dataset "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9d88f3d4",
   "metadata": {},
   "source": [
    "# Experiment No 2 :Handling imbalanced dataset using SMOTE "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "d1e680dd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.41935484, -0.05221932,  0.83333333, ...,  0.        ,\n",
       "         0.        ,  0.        ],\n",
       "       [-1.        ,  1.40992167,  0.        , ...,  0.        ,\n",
       "         0.        ,  0.        ],\n",
       "       [-0.25806452, -0.21932115,  0.        , ...,  0.        ,\n",
       "         0.        ,  0.        ],\n",
       "       ...,\n",
       "       [ 0.64516129,  1.93211488, -0.33333333, ...,  0.        ,\n",
       "         0.        ,  0.        ],\n",
       "       [ 0.58064516, -0.36553525,  0.        , ...,  0.        ,\n",
       "         0.        ,  0.        ],\n",
       "       [ 0.32258065, -0.20887728,  0.33333333, ...,  0.        ,\n",
       "         0.        ,  0.        ]])"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_scaled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "1cab3789",
   "metadata": {},
   "outputs": [],
   "source": [
    "from imblearn.over_sampling import SMOTE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "0cc858cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "smt=SMOTE(random_state=42,k_neighbors=1)\n",
    "X_res_smote,y_res_smote=smt.fit_resample(X_scaled,y_encoded)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "ee0225be",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9989\n",
      "- F1 score: 0.9989\n",
      "- Precision: 0.9989\n",
      "- Recall: 0.9989\n",
      "- COST: 1530.\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9971\n",
      "- F1 score: 0.9971\n",
      "- Precision: 0.9971\n",
      "- Recall: 0.9971\n",
      "- COST: 4080.\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boosting\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9998\n",
      "- F1 score: 0.9998\n",
      "- Precision: 0.9998\n",
      "- Recall: 0.9998\n",
      "- COST: 1020.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9967\n",
      "- F1 score: 0.9967\n",
      "- Precision: 0.9967\n",
      "- Recall: 0.9967\n",
      "- COST: 4590.\n",
      "===================================\n",
      "\n",
      "\n",
      "K-Neighbors Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9926\n",
      "- F1 score: 0.9926\n",
      "- Precision: 0.9927\n",
      "- Recall: 0.9926\n",
      "- COST: 41310.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9923\n",
      "- F1 score: 0.9923\n",
      "- Precision: 0.9924\n",
      "- Recall: 0.9923\n",
      "- COST: 10710.\n",
      "===================================\n",
      "\n",
      "\n",
      "XGBClassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9985\n",
      "- F1 score: 0.9985\n",
      "- Precision: 0.9985\n",
      "- Recall: 0.9985\n",
      "- COST: 2040.\n",
      "===================================\n",
      "\n",
      "\n",
      "CatBoosting Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9985\n",
      "- F1 score: 0.9985\n",
      "- Precision: 0.9985\n",
      "- Recall: 0.9985\n",
      "- COST: 2040.\n",
      "===================================\n",
      "\n",
      "\n",
      "AdaBoost Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.7527\n",
      "- F1 score: 0.7037\n",
      "- Precision: 0.8465\n",
      "- Recall: 0.7527\n",
      "- COST: 1380060.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.7507\n",
      "- F1 score: 0.6990\n",
      "- Precision: 0.8389\n",
      "- Recall: 0.7507\n",
      "- COST: 347820.\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model Name</th>\n",
       "      <th>Cost</th>\n",
       "      <th>Train_Recall</th>\n",
       "      <th>Test_Recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>1530</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>XGBClassifier</td>\n",
       "      <td>2040</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CatBoosting Classifier</td>\n",
       "      <td>2040</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>4080</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.997076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gradient Boosting</td>\n",
       "      <td>4590</td>\n",
       "      <td>0.999817</td>\n",
       "      <td>0.996711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K-Neighbors Classifier</td>\n",
       "      <td>10710</td>\n",
       "      <td>0.992599</td>\n",
       "      <td>0.992325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AdaBoost Classifier</td>\n",
       "      <td>347820</td>\n",
       "      <td>0.752741</td>\n",
       "      <td>0.750731</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Model Name    Cost  Train_Recall  Test_Recall\n",
       "0           Random Forest    1530      1.000000     0.998904\n",
       "4           XGBClassifier    2040      1.000000     0.998538\n",
       "5  CatBoosting Classifier    2040      1.000000     0.998538\n",
       "1           Decision Tree    4080      1.000000     0.997076\n",
       "2       Gradient Boosting    4590      0.999817     0.996711\n",
       "3  K-Neighbors Classifier   10710      0.992599     0.992325\n",
       "6     AdaBoost Classifier  347820      0.752741     0.750731"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report2=evaluate_models(X_res_smote,y_res_smote,models)\n",
    "report2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "f10e336b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "KFold(n_splits=10, random_state=None, shuffle=True)"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold,cross_val_score\n",
    "k_f = KFold(n_splits=10,shuffle=True)\n",
    "k_f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "994e5ef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test=train_test_split(X_res_smote,y_res_smote,test_size=0.3,random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "1d44867d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean of cross validation score for Random Forest model==> 0.9974931446783725\n",
      "Mean of cross validation score for Decision Tree  model==> 0.9971801013518673\n"
     ]
    }
   ],
   "source": [
    "print(\"Mean of cross validation score for Random Forest model==>\",cross_val_score(RandomForestClassifier(),X_train,y_train,cv=k_f).mean())\n",
    "print(\"Mean of cross validation score for Decision Tree  model==>\",cross_val_score(DecisionTreeClassifier(),X_train,y_train,cv=k_f).mean())"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "db61be54",
   "metadata": {},
   "source": [
    "# Experiment No3 :PCA "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "12b58fd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "X=df.drop('Class',axis=1)\n",
    "y=df['Class']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "ee7c63c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "y=LabelEncoder().fit_transform(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "e4f25489",
   "metadata": {},
   "outputs": [],
   "source": [
    "numeric_transformer =Pipeline(steps=[\n",
    "                        ('imputer',SimpleImputer(strategy='constant',missing_values=np.nan,fill_value=0)),\n",
    "                        ('robust_scaler',RobustScaler())])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "383c3a30",
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_transformer = Pipeline(steps=[\n",
    "                        ('imputer', SimpleImputer(strategy='most_frequent')),\n",
    "                        ('onehot', OneHotEncoder(drop='first',handle_unknown='ignore'))])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "b69b51f6",
   "metadata": {},
   "outputs": [],
   "source": [
    " pca_preprocessor = ColumnTransformer([\n",
    "                            ('num',numeric_transformer,Numerical_Features),\n",
    "                            ('cat', categorical_transformer, Categorical_Features)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "id": "da9ec057",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_pca=pca_preprocessor.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "id": "8c40d985",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Applying PCA\n",
    "from sklearn.decomposition import PCA\n",
    "var_ratio={}\n",
    "for n in range(2,21):\n",
    "    pc=PCA(n_components=n)\n",
    "    df_pca=pc.fit(X_pca)\n",
    "    var_ratio[n]=sum(df_pca.explained_variance_ratio_)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b430403d",
   "metadata": {},
   "source": [
    "Variace plot "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "acc7a9b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:>"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAjw0lEQVR4nO3de5RU5Znv8e/T9wsNNDSXhubSInIRBaHFa0zWwcyok4iXONHcXI7GmIlJzDozJ4yzTnTOysmYHJ2szBxPGHPijCaOZEZ0JJFEHdeccYIodxAQtLl3A01fgO6GvlXVc/6oDZZtN11Ad++qrt9nrVp77/d9d/WzN8X71H73rr3N3RERkcyTFXYAIiISDiUAEZEMpQQgIpKhlABERDKUEoCISIbKCTuAs1FWVuZTp04NOwwRkbSyfv36Bncf0708rRLA1KlTWbduXdhhiIikFTPb11O5hoBERDKUEoCISIZSAhARyVBKACIiGUoJQEQkQ/WZAMzsaTM7YmZbe6k3M/tbM6s2sy1mNj+h7gYz2xnULUkoH2Vmr5vZB8G0tH82R0REkpXMEcA/Ajecof5GYHrwuh/4KYCZZQNPBvWzgbvMbHawzhLgDXefDrwRLIuIyCDq83cA7v6mmU09Q5PFwLMev6/022Y20szKgalAtbvvBjCzZUHb7cH0U8H6zwD/D/juuW2CiEhqiMWcjkiMtq4o7cGrIxKjKxqjK+pEojEiMacrGiMSdSKx+HIkGpTF4m26utVFojFum1/B1LLifo23P34INhE4kLBcE5T1VH5FMD/O3Q8BuPshMxvb25ub2f3EjyyYPHlyP4QrIpmoMxKjrTPKya4IJzujtHVGaeuKBvPxspOd8U771HxbZ4T2roQOPRKLd+pd0Y+WB3WdkdiAxT9/SmlKJgDroczPUH5W3P0p4CmAqqoqPb1GZIhzj3+LPtER75RPdEY40RHhREeUk50RWoPpiY5ovPxUfWeUk0G7E0GHfqIjcrqjj8TOrvvIy8miMDebwtxsCnKzKMjNJj83m4KcLEqL8yjI+bA8XpcVlMXLC0+V52SRm51FTrbFp1lGTnYWudlGdtaHZafa5GTF63JOtc2KtzPrqUs9P/2RAGqASQnLFcBBIK+XcoA6MysPvv2XA0f6IQ4RCVEkGuNER5SWji5aOyK0tkdo6Yh3zq3tkXhZ9/mgvqX9w/nWjghd0eQ66yyD4rwcivNzKMrPDuazGT+8gMK8+HJhXjZFwaswL+f0fEFuQnluTkKbeKefkz30L5LsjwSwAngwGOO/AjgedOz1wHQzqwRqgTuBLySsczfwWDB9uR/iEJFz4O60dUVpbovQ0t5Fc3t8eroTDzrylvau0533qbLW9q7TnffJzmhSf68oL5th+TkMK8iJT/NzmDyq6CNlxfkfTovzsinKz2FYfjZFeTkU58U7+2H5OeTnZA3IN+NM0WcCMLPniZ+wLTOzGuARIBfA3ZcCK4GbgGrgJHBPUBcxsweBV4Fs4Gl33xa87WPAP5vZvcB+4I5+3CaRjBOJxmg80UlDawct7RGa2z7syJvbIjS3d3Wbj0+b2+LzfQ2PmMGwvHgHXRJ00iMLc6koLaQk/1RZ7kfqT3XoJac69IJ4552dpQ47VVg6PRS+qqrKdTdQySSRaIyG1k6OtLRT19xxelrfbbmxtYMz9eHFedmUFOQyvDCH4QW5DC/MpaTg1HxOvC6YH5Z/ajnhG3leDlnquNOWma1396ru5Wl1O2iRocTdOXqyiz0NJ9jTcIL9TSc50tzOkZYO6oJpQ2sH3b+jmcHo4nzGDc9nbEk+cyaMYGxJPmOHF1A2LO90Bz+8IN7JlxTkZMR4tpw9JQCRAXaiI8KehhPsbTzBnvp4Z7876PSPt3WdbmcGZcPinfq44QVcMnEEY4cXnF4+NS0blqcOXfqFEoBIP+iMxNjfdDLe0Z/u4FvZ03CCuuaOj7SdMKKAyjHFfHZuOZVlw6gsK6KybBgVpYXkqmOXQaQEIHIWYjGn9lgbOw63sONQMzvq4tO9jSeJJgzCjyrOo7KsmE9MH0NlWfHp19TRxRTmZYe4BSIfUgIQ6cXxti52Hm5hx+Hm0x3++3WttHZETreZPKqIGeNLuHFOOdPGxjv4yrJiRhblhRi5SHKUACTjdUZi7G5oZefhFt471MLOoMM/dLz9dJsRhbnMGF/C7fMnMmP8cGaWl3DRuBKG5eu/kKQvfXol49Q1t7N+39HTr20Hj5/+5WlutjFtzDCuqBx1uqOfOb6E8cML9IMjGXKUAGRI64rG2HGohfX7mli//xgb9h2l9lgbAPk5WVxaMYJ7rqnk4gnDmTG+hAvKhpGXoxOxkhmUAGRIOXqikw3749/sN+w/yuYDx2nrit+iYPzwAhZMKeVPrq1kwZRSZpcPV2cvGU0JQNJWLObsqm/9cDhn/1F2158AICfLmD1hOJ+/fBILppSyYEopE0YWhhyxSGpRApC00RWNsbX2OGv3NrFmz1HW7Wvi2Mn4D6lKi3JZMKWUzy2oYMHkUi6tGKnLLUX6oAQgKautM8rG/UdZs7eJtXub2LDv2OnhnKmji/j0rHFcXjmKqimlVJYV6yStyFlSApCUcexkJ+v2HmXt3ibe2dPE1trjRGKOGcwaHx/OuXzqKC6fWsrY4QVhhyuS9pQAJDR1ze28s6eJtXuaWLOniZ11LQDkZcevzvnqdRewcOoo5k8pZURhbsjRigw9SgAyqDoiUV7fXseyNQf4fXUDEL9V8fwppXx2bjmXTx3F3EkjKcjV+L3IQFMCkEFRfaSFZWsO8OLGWppOdDJxZCEPXT+dRTPHMau8RHe3FAmBEoAMmLbOKK+8e4hla/azbt9RcrKMT88ex50LJ3PthWV6MpRIyJQApN9trT3OsrX7eXnjQVo6IlxQVsxf3DiT2xdUUDYsP+zwRCSgBCD9orm9ixWbDrJs7X621jaTn5PFTZeUc+flk1hYOUqXaIqkICUAOWfuzob9R3l+zQFe2XKItq4oM8eX8Fc3X8wt8yYyokhX7oikMiUAOWvRmPPcO/v4xep9fHCkleK8bG65bAJ3Xj6ZSytG6Nu+SJpQApCz0nSik28v28h/ftDA3Ekj+eHtl/CZSydQrPvii6Qd/a+VpG3cf5RvPLeBhhOd/PVtl3Dn5ZP0bV8kjSkBSJ/cnWdX7+P7r2xn3PAClj9wNZdUjAg7LBE5T0oAckYnOiIsefFdfr35IItmjuVv/nieTu6KDBFKANKr6iMtPPDLDeyub+XP/3AGX//kNLL04y2RIUMJQHq0YvNBlizfQlFeNr+89wquvrAs7JBEpJ8pAchHdEZi/M9XtvPM6n1UTSnlf39hPuNH6NbLIkOREoCcdvBYG3/63AY2HTjGfddW8t0bZ5Krm7SJDFlKAALAm+/X8+1lG+mKOv/ni/O56ZLysEMSkQGW1Nc7M7vBzHaaWbWZLemhvtTMXjKzLWa2xszmJNR928y2mtk2M3soofxRM6s1s03B66Z+2SI5K7GY85N/+4C7/2ENY0sKWPHgNer8RTJEn0cAZpYNPAl8GqgB1prZCnffntDsYWCTu99qZjOD9ouCRPBVYCHQCfzOzF5x9w+C9X7s7o/34/bIWWg60clDv9rEm+/Xc9tlE/n+rXMoytNBoUimSOYIYCFQ7e673b0TWAYs7tZmNvAGgLvvAKaa2ThgFvC2u5909wjwH8Ct/Ra9nLNNB47x2b/7PW/vauQHt17CE388V52/SIZJJgFMBA4kLNcEZYk2A7cBmNlCYApQAWwFrjOz0WZWBNwETEpY78Fg2OhpMyvt6Y+b2f1mts7M1tXX1ye1UXJm//TOfu5Y+hYAL3z9Kr5wxWTd0kEkAyWTAHrqGbzb8mNAqZltAr4JbAQi7v4e8EPgdeB3xBNFJFjnp8A0YB5wCHiipz/u7k+5e5W7V40ZMyaJcOVMfr35IA+/9C7XXFjGK9+6lksrRoYdkoiEJJlj/ho++q29AjiY2MDdm4F7ACz+VXJP8MLdfw78PKj7QfB+uHvdqfXN7GfAb851IyQ579Yc58/+ZTNVU0r5+y8vID9HD14XyWTJHAGsBaabWaWZ5QF3AisSG5jZyKAO4D7gzSApYGZjg+lk4sNEzwfLiZea3Ep8uEgGyJGWdr767DrKhuWzVJ2/iJDEEYC7R8zsQeBVIBt42t23mdkDQf1S4id7nzWzKLAduDfhLZab2WigC/iGux8Nyn9kZvOIDyftBb7WP5sk3bV3RfnaL9ZzvK2LF75+lZ7LKyJAkj8Ec/eVwMpuZUsT5lcD03tZ9xO9lH85+TDlXLk7D7/0Lhv3H2Ppl+Zz8QTdxllE4vQ7/yHuZ/+5mxc31PKd6y/ihjn6gZeIfEgJYAj79x1H+Ovf7uCPLinnW4suDDscEUkxSgBDVPWRFr71/EZmlw/n8Tvm6jp/EfkYJYAh6NjJTu59Zh35udn87CtVFObpih8R+TglgCGmKxrjG/+0gUPH2vn7Ly9gwsjCsEMSkRSlm78MMd//zXZWVTfy+B1zWTClx7triIgAOgIYUv7pnf08s3ofX/1EJZ9bUBF2OCKS4pQAhoi3dzfyvZe38qkZY1hy46ywwxGRNKAEMAQcaDrJ13+5nimji/jbuy4jO0tX/IhI35QA0lxrR4T7nllHzOH/3n05wwtyww5JRNKETgKnsVjM+c6vNlFd38oz9yyksqw47JBEJI3oCCCNPfH6Tl7fXsd//6NZXDu9LOxwRCTNKAGkqZc31fLkv+/iroWTuPvqqWGHIyJpSAkgDW0+cIz/9sIWFlaO4q9unqPbPIjIOVECSDN1ze3c/4t1jCnJ56dfnE9ejv4JReTc6CRwGonGnD99bgMt7RGWf/1qRuvBLiJyHpQA0shz7+xj/b6j/Pjzc5lVPjzscEQkzWn8IE3UNbfzo9/t5BPTy7hl3sSwwxGRIUAJIE381a+30RWN8f1bdNJXRPqHEkAaeOO9Ola+e5hvLZrOlNH6sZeI9A8lgBR3oiPC917exoxxJdx/3QVhhyMiQ4hOAqe4H7/+PrXH2lj+9avIzVa+FpH+ox4lhW2tPc7Tq/bwhSsms2DKqLDDEZEhRgkgRUVjzsMvvcuo4ny+e8PMsMMRkSFICSBFPbt6L1tqjvPIZ2czolC3eBaR/qcEkIIOHW/j8Vd38smLxvCZS8vDDkdEhiglgBT0yMvbiLrrmn8RGVBKACnm1W2HeW17HQ9dfxGTRhWFHY6IDGFKACmktSPCoyu2MXN8CfdeWxl2OCIyxOl3ACnkidd2cri5nSe/OF/X/IvIgEuqlzGzG8xsp5lVm9mSHupLzewlM9tiZmvMbE5C3bfNbKuZbTOzhxLKR5nZ62b2QTAt7ZctSlNbao7xzFt7+dIVU5g/OaN3hYgMkj4TgJllA08CNwKzgbvMbHa3Zg8Dm9z9UuArwE+CdecAXwUWAnOBz5jZ9GCdJcAb7j4deCNYzkiRaIy/ePFdyobl8+c3zAg7HBHJEMkcASwEqt19t7t3AsuAxd3azCbeiePuO4CpZjYOmAW87e4n3T0C/Adwa7DOYuCZYP4Z4Jbz2ZB09o9v7WXbwWYevflihhfomn8RGRzJJICJwIGE5ZqgLNFm4DYAM1sITAEqgK3AdWY22syKgJuAScE649z9EEAwHdvTHzez+81snZmtq6+vT26r0kjtsTb+5vX3+S8zx3LjnPFhhyMiGSSZBNDThejebfkxoNTMNgHfBDYCEXd/D/gh8DrwO+KJInI2Abr7U+5e5e5VY8aMOZtVU56788jLW3GH/7H4Yl3zLyKDKpmrgGr48Fs7xL/ZH0xs4O7NwD0AFu/F9gQv3P3nwM+Duh8E7wdQZ2bl7n7IzMqBI+exHWnp1W2H+bf3jvCXN82iolTX/IvI4ErmCGAtMN3MKs0sD7gTWJHYwMxGBnUA9wFvBkkBMxsbTCcTHyZ6Pmi3Arg7mL8bePl8NiTdNLd38ciKbcwuH84910wNOxwRyUB9HgG4e8TMHgReBbKBp919m5k9ENQvJX6y91kziwLbgXsT3mK5mY0GuoBvuPvRoPwx4J/N7F5gP3BHf21UOnji1Z0caengqS9XkaNr/kUkBEn9EMzdVwIru5UtTZhfDUzvvl5Q94leyhuBRUlHOoRs3H+UZ9/ex91XTWXupJFhhyMiGUpfPQdZV3DN/7iSAv7rH1wUdjgiksF0K4hB9g+r9rDjcAtLv7SAEl3zLyIh0hHAIDrQdJIfv/4B188axx9ePC7scEQkwykBDKIfrHwPM13zLyKpQQlgkNS3dPDa9jq+fNUUJowsDDscERElgMHy8qZaojHnjgUVYYciIgIoAQyaF9bXMLdiBBeOLQk7FBERQAlgUGw7eJwdh1u4Xd/+RSSFKAEMguXra8nNNj576YSwQxEROU0JYIB1RWO8vKmWRTPHUVqc1/cKIiKDRAlggL35fj2NJzq5bX73RyiIiIRLCWCALd9Qw6jiPD41o8fn3YiIhEYJYAAdO9nJv20/ws1zJ5CXo10tIqlFvdIA+vWWQ3RGY3xOV/+ISApSAhhAy9fXMGNcCRdPGB52KCIiH6MEMEB21bey6cAxbl8wUff9EZGUpAQwQJavryHL4JZ5uvpHRFKTEsAAiMWclzbWct1FYxg7vCDscEREeqQEMABW727k0PF2bp+vk78ikrqUAAbA8vU1lBTk8OnZeuiLiKQuJYB+1toR4bdbD/OZS8spyM0OOxwRkV4pAfSz3757iLauqIZ/RCTlKQH0s+Ubapg6uogFU0rDDkVE5IyUAPrRgaaTvL27idvmV+jafxFJeUoA/ehfN9YCcOtluvZfRFKfEkA/cXde3FjLlReMYtKoorDDERHpkxJAP9mw/yh7Gk7o5K+IpA0lgH7ywvpaCnOzufGS8rBDERFJihJAP2jvivKbLQe5Yc54huXnhB2OiEhSlAD6wevb62hpj2j4R0TSSlIJwMxuMLOdZlZtZkt6qC81s5fMbIuZrTGzOQl13zGzbWa21cyeN7OCoPxRM6s1s03B66b+26zB9eKGGspHFHDVtNFhhyIikrQ+E4CZZQNPAjcCs4G7zGx2t2YPA5vc/VLgK8BPgnUnAt8Cqtx9DpAN3Jmw3o/dfV7wWnneWxOCIy3tvPlBA7deNpHsLF37LyLpI5kjgIVAtbvvdvdOYBmwuFub2cAbAO6+A5hqZqfuhJYDFJpZDlAEHOyXyFPEyxsPEo05t+uxjyKSZpJJABOBAwnLNUFZos3AbQBmthCYAlS4ey3wOLAfOAQcd/fXEtZ7MBg2etrMerx3gpndb2brzGxdfX19Uhs1WNyd5RtqmDdpJNPGDAs7HBGRs5JMAuhpXMO7LT8GlJrZJuCbwEYgEnTqi4FKYAJQbGZfCtb5KTANmEc8OTzR0x9396fcvcrdq8aMGZNEuINn28Fmdhxu0bd/EUlLyVyzWANMSliuoNswjrs3A/cAWPwmOHuC1x8Ce9y9Pqh7Ebga+KW7151a38x+Bvzm3DcjHMs31JCXncVnL9W1/yKSfpI5AlgLTDezSjPLI34Sd0ViAzMbGdQB3Ae8GSSF/cCVZlYUJIZFwHvBOom95q3A1vPblMHVFY2xYtNBFs0ay8iivL5XEBFJMX0eAbh7xMweBF4lfhXP0+6+zcweCOqXArOAZ80sCmwH7g3q3jGzF4ANQIT40NBTwVv/yMzmER9O2gt8rR+3a8D9x856Gk906tp/EUlbSf1sNbhEc2W3sqUJ86uB6b2s+wjwSA/lXz6rSFPM8g01jC7O45MzUuu8hIhIsvRL4HNw7GQnb7x3hMXzJpKbrV0oIulJvdc5+PXmg3RGY9y+QPf9F5H0pQRwDl7YUMvM8SVcPGFE2KGIiJwzJYCzVH2klc0HjvE5XfsvImlOCeAsLd9QQ3aWcfO8CWGHIiJyXpQAzkI05vzrxlqum17G2JKCsMMRETkvSgBnYfWuRg4db9etH0RkSFACOAvLN9QwvCCH62eN67uxiEiKUwJIUmtHhN9tPcxn5k6gIDc77HBERM6bEkCSVr57iLauqG79ICJDhhJAkl7Zcogpo4uYP3lk2KGIiPQLJYAkdEZirNnTxKcuGkP8pqYiIulPCSAJG/cfpa0ryjUXloUdiohIv1ECSMKqXY1kGVxxweiwQxER6TdKAEl4q7qBSypGMqIwN+xQRET6jRJAH1o7Imw6cIxrpunbv4gMLUoAfVizp5FIzDX+LyJDjhJAH1ZVN5KXk8WCKaVhhyIi0q+UAPqwqrqBqiml+vWviAw5SgBn0NDawY7DLRr+EZEhSQngDFbvagRQAhCRIUkJ4Aze2tVASUEOl0zUox9FZOhRAjiDVdWNXHnBaLKzdPsHERl6lAB6caDpJPubTur6fxEZspQAerGqugHQ+L+IDF1KAL1YtauRsSX5XDh2WNihiIgMCCWAHrg7q3c1cPW00br9s4gMWUoAPdhZ10JDa6eGf0RkSFMC6MGqal3/LyJDnxJAD96qbqCyrJgJIwvDDkVEZMAklQDM7AYz22lm1Wa2pIf6UjN7ycy2mNkaM5uTUPcdM9tmZlvN7HkzKwjKR5nZ62b2QTBNibutdUVjvLOniat1+aeIDHF9JgAzywaeBG4EZgN3mdnsbs0eBja5+6XAV4CfBOtOBL4FVLn7HCAbuDNYZwnwhrtPB94IlkO3peYYrR0RDf+IyJCXzBHAQqDa3Xe7eyewDFjcrc1s4p047r4DmGpm44K6HKDQzHKAIuBgUL4YeCaYfwa45Vw3oj+tqm7EDK7S4x9FZIhLJgFMBA4kLNcEZYk2A7cBmNlCYApQ4e61wOPAfuAQcNzdXwvWGefuhwCC6dhz3Yj+tKq6gdnlwyktzgs7FBGRAZVMAujpQnjvtvwYUGpmm4BvAhuBSDCuvxioBCYAxWb2pbMJ0MzuN7N1Zrauvr7+bFY9a22dUTbuP6bhHxHJCMkkgBpgUsJyBR8O4wDg7s3ufo+7zyN+DmAMsAe4Htjj7vXu3gW8CFwdrFZnZuUAwfRIT3/c3Z9y9yp3rxozZkzyW3YO1u5tojMaUwIQkYyQTAJYC0w3s0ozyyN+EndFYgMzGxnUAdwHvOnuzcSHfq40syKL/6R2EfBe0G4FcHcwfzfw8vltyvlbtauB3Gzj8qkpcUGSiMiAyumrgbtHzOxB4FXiV/E87e7bzOyBoH4pMAt41syiwHbg3qDuHTN7AdgARIgPDT0VvPVjwD+b2b3EE8Ud/bpl5+Ct6kYum1xKUV6fu0VEJO0l1dO5+0pgZbeypQnzq4Hpvaz7CPBID+WNxI8IUsKxk51sPXichxZdFHYoIiKDQr8EDqze1Yg7XHOhLv8UkcygBBBYtauB4rxs5k4aGXYoIiKDQgkg8FZ1IwsrR5GbrV0iIplBvR1w6HgbuxtO6PJPEckoSgDo9s8ikpmUAIjf/nl0cR4zxpWEHYqIyKDJ+ATg7qza1cBV00aTlaXHP4pI5sj4BLCr/gR1zR0a/hGRjJPxCWBVdQMA10xTAhCRzKIEUN1ARWkhk0cXhR2KiMigyugEEI05b+9u1Ld/EclIGZ0AttYep7k9wjXTlQBEJPNkdAJYtSs+/q8HwItIJsroBPBWdSMzx5dQNiw/7FBERAZdxiaA9q4oa/c2cbXG/0UkQ2VsAtiw/ygdkZhu/ywiGStjE8Cq6gays4yFlaPCDkVEJBQZnAAamVsxgpKC3LBDEREJRUYmgOb2LrbUHNPtH0Qko2VkAnhndxMx1+2fRSSzZWQCWFXdQEFuFpdNHhl2KCIiocnIBPDWrgYunzqK/JzssEMREQlNxiWAIy3tvF/XquEfEcl4GZcAVu8KHv+oH4CJSIbLuATw+w8aGFGYy+wJw8MORUQkVBmVANydt3Y1ctUFo8nW4x9FJMNlVALY13iS2mNtuv2DiAgZlgBO3f5ZJ4BFRDIsAbxV3Uj5iAIqy4rDDkVEJHQZkwBiMeetXQ1cPa0MM43/i4hkTAJ473AzR092afxfRCSQVAIwsxvMbKeZVZvZkh7qS83sJTPbYmZrzGxOUD7DzDYlvJrN7KGg7lEzq02ou6lft6ybt6qD6/81/i8iAkBOXw3MLBt4Evg0UAOsNbMV7r49odnDwCZ3v9XMZgbtF7n7TmBewvvUAi8lrPdjd3+8X7akD7+vbmDamGLGDS8YjD8nIpLykjkCWAhUu/tud+8ElgGLu7WZDbwB4O47gKlmNq5bm0XALnffd54xn7XOSIw1e5r07V9EJEEyCWAicCBhuSYoS7QZuA3AzBYCU4CKbm3uBJ7vVvZgMGz0tJmV9vTHzex+M1tnZuvq6+uTCPfjNh04RltXVAlARCRBMgmgp0tmvNvyY0CpmW0CvglsBCKn38AsD7gZ+JeEdX4KTCM+RHQIeKKnP+7uT7l7lbtXjRkzJolwP25VdQNZBldeoBPAIiKn9HkOgPg3/kkJyxXAwcQG7t4M3ANg8Wss9wSvU24ENrh7XcI6p+fN7GfAb842+GRNGFnAHQsmMaJQj38UETklmQSwFphuZpXET+LeCXwhsYGZjQROBucI7gPeDJLCKXfRbfjHzMrd/VCweCuw9Zy2IAmfv3wyn7988kC9vYhIWuozAbh7xMweBF4FsoGn3X2bmT0Q1C8FZgHPmlkU2A7ce2p9MysifgXR17q99Y/MbB7x4aS9PdSLiMgAMvfuw/mpq6qqytetWxd2GCIiacXM1rt7VffyjPklsIiIfJQSgIhIhlICEBHJUEoAIiIZSglARCRDKQGIiGSotLoM1MzqgYG8mVwZ0DCA799fFGf/Spc4IX1iVZz973xineLuH7uXTlolgIFmZut6ulY21SjO/pUucUL6xKo4+99AxKohIBGRDKUEICKSoZQAPuqpsANIkuLsX+kSJ6RPrIqz//V7rDoHICKSoXQEICKSoZQAREQyVEYlADObZGb/bmbvmdk2M/t2D20+ZWbHzWxT8PpeGLEGsew1s3eDOD52H2yL+1szqw6erTw/hBhnJOyrTWbWbGYPdWsTyj4NnjV9xMy2JpSNMrPXzeyDYNrbs6hvMLOdwb5dElKs/8vMdgT/ti8FD17qad0zfk4GIc5Hzaw24d/3pl7WHbR92kucv0qIcW/wCNue1h3M/dljnzRon1N3z5gXUA7MD+ZLgPeB2d3afAr4TdixBrHsBcrOUH8T8Fviz22+Engn5HizgcPEf3QS+j4FrgPmA1sTyn4ELAnmlwA/7GU7dgEXAHnA5u6fk0GK9Q+AnGD+hz3FmsznZBDifBT4syQ+G4O2T3uKs1v9E8D3UmB/9tgnDdbnNKOOANz9kLtvCOZbgPeAieFGdV4WA8963NvASDMrDzGeRcAudx/IX2snzd3fBJq6FS8GngnmnwFu6WHVhUC1u+/2+GNOlwXrDZieYnX319w9Eiy+Tfx53KHqZZ8mY1D36ZniDJ5b/sd0e0xtGM7QJw3K5zSjEkAiM5sKXAa800P1VWa22cx+a2YXD25kH+HAa2a23szu76F+InAgYbmGcBPanfT+nypV9uk4D55FHUzH9tAm1fYrwJ8QP9rrSV+fk8HwYDBU9XQvwxWptE8/AdS5+we91IeyP7v1SYPyOc3IBGBmw4DlwEP+0YfXA2wgPoQxF/g74F8HObxE17j7fOBG4Btmdl23euthnVCu6zWzPOBm4F96qE6lfZqMlNmvAGb2l0AEeK6XJn19TgbaT4FpwDzgEPHhle5SaZ/exZm//Q/6/uyjT+p1tR7KzmqfZlwCMLNc4jv6OXd/sXu9uze7e2swvxLINbOyQQ7zVCwHg+kR4CXih3yJaoBJCcsVwMHBie5jbgQ2uHtd94pU2qdA3alhsmB6pIc2KbNfzexu4DPAFz0Y+O0uic/JgHL3OnePunsM+Fkvfz8l9qmZ5QC3Ab/qrc1g789e+qRB+ZxmVAIIxv5+Drzn7n/TS5vxQTvMbCHxfdQ4eFGejqPYzEpOzRM/Ibi1W7MVwFcs7krg+KnDxhD0+q0qVfZpYAVwdzB/N/ByD23WAtPNrDI4srkzWG9QmdkNwHeBm939ZC9tkvmcDKhu551u7eXvp8Q+Ba4Hdrh7TU+Vg70/z9AnDc7ndDDOdKfKC7iW+CHSFmBT8LoJeAB4IGjzILCN+Bn1t4GrQ4r1giCGzUE8fxmUJ8ZqwJPErwR4F6gKKdYi4h36iISy0Pcp8YR0COgi/m3pXmA08AbwQTAdFbSdAKxMWPcm4ldk7Dq170OItZr4GO+pz+rS7rH29jkZ5Dh/EXz+thDvgMrD3qc9xRmU/+Opz2VC2zD3Z2990qB8TnUrCBGRDJVRQ0AiIvIhJQARkQylBCAikqGUAEREMpQSgIhIhlICEBHJUEoAIiIZ6v8DJ1FqG6n3TwAAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plotting variance ratio\n",
    "pd.Series(var_ratio).plot()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "1cf017a7",
   "metadata": {},
   "source": [
    "Kneed algorithm to find the elbow point"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "5970696c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Figure size 360x360 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAGDCAYAAAA8mveiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAA2v0lEQVR4nO3deZhU1bnH++8rM6iA0BpkFhEFBAREccwRVDAqkegVE5zimIgGvdEY40nUGGeNQzxOJ6BirhpRET2oIBqHiCLQzQwyK0KYEbCZee8fezcUTTXdRdWu3dX1+zxPPVW1h6pfFU2/vdbae21zd0RERCpqv7gDiIhIblHhEBGRlKhwiIhISlQ4REQkJSocIiKSEhUOERFJiQqHSI4zs+lm9uO4c0j+UOGQvGdmC82sd8LzAWa2xsxOjSHHRjPbYGbLzGyome1f3n7u3sHd/5XCe/Quf0uRsqlwiCQws0uBJ4GfuPvHMUQ4x933B7oCxwK3x5BBZK9UOERCZnY18DBwprt/Hi5rZWZuZpea2TdmttLM/pCwz35mdquZzTOzVWb2TzM7KGH98Wb2uZmtNbPJFe1ScvfvgHeBjuHrnBt2Sa01s3+Z2VEJ77GzFWFmd4QZXjSz9eE+3cN1w4AWwNthq+aW9L4xyVcqHCKBXwF/Bnq5+4Qk608C2gG9gD8m/OK+AfgpcCpwKLCGoMWCmTUF/g+4GzgI+C3wupkVlBfGzJoDZwGFZnYE8DIwGCgARhH88q9Zxu7nAq8ADYCRwN8A3P1i4BvCVo27P1BeDpFkVDhEAqcDXwBTy1h/p7tvdPfJwGSgc7j8GuAP7r7Y3TcDdwDnm1l1YCAwyt1HufsOdx8DTCAoCGUZYWZrgc+Aj4F7gAuB/3P3Me6+FXgIqAOcUMZrfBa+53ZgWEJWkYxQ4RAJXAscAfyvmVmS9f9JeFwMlAxatwTeDLuQ1gIzge3AIeG6C0rWhetPAprsJcdP3b2Bu7d091+7+0aClsyikg3cfQfwLdC0jNconbV2WMhEMkKFQySwnKAb6mTgf1LY71ugb/jLvuRWOxyj+BYYVmpdPXe/L8VsSwiKEABhYWsOfJfi6wBoOmxJmwqHSMjdlwCnAX3M7K8V3O1p4C9m1hLAzArMrF+47iXgHDM708yqmVltM/uxmTVLMdo/gZ+YWS8zqwH8v8Bm4PMUXwdgGXDYPuwnspMKh0gCd/+WoHicb2b3VmCXxwgGoEeb2XqCcZLjEl6rH3AbsIKgBXIzKf6/c/fZBOMlTwArgXMIBri3pPI6oXuB28Ous9/uw/4imC7kJCIiqVCLQ0REUqLCISIiKVHhEBGRlKhwiIhISlQ4REQkJXlxNmnjxo29VatWcccQEckpEydOXOnue8ytlheFo1WrVkyYkGzeOhERKYuZLUq2XF1VIiKSEhUOERFJiQqHiIikJC/GOJLZunUrixcvZtOmTXFHyQu1a9emWbNm1KhRI+4oIpKmvC0cixcv5oADDqBVq1Ykv/yCZIq7s2rVKhYvXkzr1q3jjiMiacrbrqpNmzbRqFEjFY0sMDMaNWqk1p1IFZG3hQNQ0cgifdciVUdeF47K5I477uChhx4qc/2IESOYMWNGFhOJiCSnwpEjVDhEpLJQ4YjRX/7yF9q1a0fv3r2ZPXs2AM899xzHHnssnTt35mc/+xnFxcV8/vnnjBw5kptvvpkuXbowb968pNuJiGRD3h5VlWjwYCgqyuxrdukCjz5a9vqJEyfyyiuvUFhYyLZt2+jatSvdunWjf//+XHXVVQDcfvvt/P3vf+f666/n3HPP5eyzz+b8888HoEGDBkm3ExGJmgpHTD799FPOO+886tatC8C5554LwLRp07j99ttZu3YtGzZs4Mwzz0y6f0W3ExHJNBUO9t4yiFKyI40uu+wyRowYQefOnXn++ef517/+lXTfim5X2Vz4zDgAXr2mZ8xJRGRfaYwjJqeccgpvvvkmGzduZP369bz99tsArF+/niZNmrB161b+8Y9/7Nz+gAMOYP369Tufl7WdiEjU1OKISdeuXbnwwgvp0qULLVu25OSTTwbgz3/+M8cddxwtW7bk6KOP3lksBgwYwFVXXcXjjz/O8OHDy9yusmtSv3bcEUQkTebucWeIXPfu3b309ThmzpzJUUcdFVOi/KTvXCS3mNlEd+9eerm6qkREJCUqHJJVd749nTvfnh53DBFJg8Y4JKtmLFkXdwQRSZNaHCIikhIVDhERSYkKh4iIpCSywmFmQ8xsuZlNK2O9mdnjZjbXzKaYWdeEdX3MbHa47taE5QeZ2RgzmxPeN4wqfzYsXLiQjh07Rvoe48aN2zmnVXkZnn/+eQYNGhRpnsMK6nFYQb1I30NEohVli+N5oM9e1vcF2oa3q4GnAMysGvBkuL49cJGZtQ/3uRUY6+5tgbHhc9mL9957jz599vbPkF339u/Evf07xR1DRNIQWeFw90+A1XvZpB/woge+ABqYWROgBzDX3ee7+xbglXDbkn1eCB+/APw0kvAxmD9/PscccwwPPvgg/fv3p0+fPrRt25Zbbrll5zajR4+mZ8+edO3alQsuuIANGzYAwUy7p556Kt26dePMM89k6dKlO/cZO3YsvXv3Zvv27dx8880ce+yxdOrUiWeeeSZpjm+//ZY+ffrQrl077rzzzp3LH3nkETp27EjHjh15NJzc64EHHuDxxx8H4MYbb+S0007b+Z4DBw7M6PcjIpVHnIfjNgW+TXi+OFyWbPlx4eND3H0pgLsvNbODMxWmZPK9RGd3asLFPVuxcct2Lhs6fo/153drxgXdm7P6hy386qWJu61LZRK/2bNnM2DAAIYOHUpRURFFRUUUFhZSq1Yt2rVrx/XXX0+dOnW4++67+eCDD6hXrx73338/jzzyCL///e+5/vrreeuttygoKODVV1/lD3/4A0OGDGHlypXUqFGD+vXr8+yzz1K/fn2++uorNm/ezIknnsgZZ5yxx0SL48ePZ9q0adStW5djjz2Wn/zkJ5gZQ4cO5csvv8TdOe644zj11FM55ZRTePjhh7nhhhuYMGECmzdvZuvWrXz22Wc7p1Ap7fdvTAFQq0ME2LIF1q+HDRv2vN+wATZtCrbZvDm4JXtc3vonn4QTTshs7jgLR7KLUPtelqf24mZXE3SB0aJFi1R3z5oVK1bQr18/Xn/9dTp06EBRURG9evWifv36ALRv355Fixaxdu1aZsyYwYknngjAli1b6NmzJ7Nnz2batGmcfvrpAGzfvp0mTZoAQQvljDPO2Pl4ypQpDB8+HIDvv/+eOXPmcMQRR+yW5/TTT6dRo0YA9O/fn88++wwz47zzzqNevXo7l3/66af86le/YuLEiaxfv55atWrRtWtXJkyYwKeffrqzJVLa/BU/ZPLrE8m6bdvg++9h3brgPtlt3bqgACQWgWQFYuvW1N+/enWoVSu41axZ9uMGDXY9z7Q4C8dioHnC82bAEqBmGcsBlplZk7C10QRYXtaLu/uzwLMQzFVVXpi9tRDq1Ky21/UH1au5z9OE169fn+bNm/Pvf/+bDh06AFAr4V+6WrVqbNu2DXfn9NNP5+WXX95t/6lTp9KhQwfGjduzxfTuu+9y0003AeDuPPHEE3tct2PhwoW7PS/dAjEzyprPrEaNGrRq1YqhQ4dywgkn0KlTJz766CPmzZunOamk0tqyBdas2XVbu3bP52vX7l4EEotCRS62WasWHHgg7L9/cDvgAKhfH5o12/U8cV2y+3r1oE6d3YtCzZqwXyU4FjbOwjESGGRmrxB0RX0fFoQVQFszaw18BwwAfp6wz6XAfeH9W9mPnVk1a9ZkxIgRnHnmmey///5lbnf88cdz3XXXMXfuXA4//HCKi4tZvHgx7dq1Y8WKFYwbN46ePXuydetWvv76a9q3b8+UKVPo0qULAGeeeSZPPfUUp512GjVq1ODrr7+madOme7zPmDFjWL16NXXq1GHEiBEMGTKE/fbbj8suu4xbb70Vd+fNN99k2LBhQDA9/EMPPcSQIUM4+uijuemmm+jWrVvSa42IZMqOHcEv8VWrgtvq1bvuV6/ee1Eo7xd/3brBX+v16we//OvXhxYtgvvEW8m6ZMuj+Cu/MomscJjZy8CPgcZmthj4E1ADwN2fBkYBZwFzgWLg8nDdNjMbBLwPVAOGuHvJ5Eb3Af80syuAb4ALosqfTfXq1eOdd97h9NNPL3NQuaCggOeff56LLrqIzZs3A3D33XdzxBFHMHz4cG644Qa+//57tm3bxuDBg9m4cSPHHHPMzl/gV155JQsXLqRr1664OwUFBYwYMWKP9znppJO4+OKLmTt3Lj//+c/p3j2YGPOyyy6jR48eO1/rmGOOAeDkk0/mL3/5Cz179qRevXrUrl27zPENkWSKi2Hlyl1FoHQxKF0YVq0KisCOHWW/5oEHQsOGu25HHLH784YNg+KQbFnNmtn65LlL06pXUXfffTeHH344AwYMiDvKTjNnzuSfc4P/7X86p0PMaSTT3OGHH4Jf7CtX7ioGe7tfuTIYAC7L/vtDo0bB7aCDdr9P9vigg4ICUK1a9j53VVbWtOqa5LCKuv322+OOkJQKRu5wDwZxly+HFSv2vC+9bOXK4CieZMyCX+iNGwe/5Js3hy5dguclyxo33rMQ6K//ykmFQyQPbdwICxbAvHnB7dtvkxeFsgpBvXpQUAAHHwyHHrp7EUgsBCX3agVULSocklWDXykE4NEBx8ScpGpzD7qD5s/fVRxKbvPnw3ff7b59nTpBESgogB/9CI4+etfz0vcFBcEAsuSvvC4c7q6jf7KkZCxt6fd76dCWlC1fDlOnJi8Q60pd+uTQQ+Gww6B3b2jTJrgddlhw37hx0J0kUhF5Wzhq167NqlWraNSokYpHxNydVatWUbt27bij5LRVq2DiRJgwIbhNnAjffLNrfY0a0Lp1UAhOPHH3wtC6tVoJkjl5WziaNWvG4sWLWbFiRdxR8kLt2rVp1qwZ8J+4o+SE1ath0qTdi0TiuZpt2wbF4Te/CcYXDj8cmjbVOIJkR94Wjho1atC6deu4Y4iwdu2eRWL+/F3r27SB446DX/8auneHrl2DE81E4pK3hUPi0bVlTl9CJW3uwfjDJ58Et3//G+bO3bW+dWvo1g2uvnpXkWiY31+ZVEIqHJJVv+tzZNwRsmrHDpgxY1eh+OQTKJn1vnFjOOkk+OUvg2LRrVtw+KpIZafCIZJB27ZBUdGuIvHpp8F4BQRjEP/1X3DKKcHtyCN1JJPkJhUOyaprhwXXLXn64m4xJ8mMTZvgq6+CAlHS9RReX4vDD4ef/nRXoWjVSoVCqgYVDsmqNcVb4o6QtlmzYORIGDUKvvhi19nVHTvCJZcEReLkk4PzJkSqIhUOkXJs3x4UiLfeCm5ffx0s79IFrrsuKBQnnaTxCckfKhwiSRQXw5gxQaF4551g7qbq1YMxihtugHPOCa7RIJKPVDhEQsuXB0XirbeCorFxY3C+xFlnwbnnQt++On9CBFQ4JMtOPLxx3BF2M3v2ri6oceOC8yyaN4crroB+/YJuKE3tLbI7FQ7Jqht6tY31/d1h/Hh4442gWMyeHSw/5hj405+ClkWXLjr6SWRvVDikytuxAz7/HIYPDwrGt9/uGq8YNCgoFhqvEKk4FQ7JqkuHjAfghV/2iPR9tm0Lzqt4/fWgWPznP1CrFpx5Jtx9dzC4rak8RPaNCodk1aat2yN77S1b4MMPg2IxYkRwKdM6dYLB7fPPh5/8BA44ILK3F8kbKhyS0zZtCo6AGj48OClv7VrYf/+gRfGzn0GfPsFlTkUkc1Q4JOcUF8O77wYti3fegfXroUGDYKzi/PPh9NNB14wSiY4Kh+SM9evhkUfg4YeDx40bw4UXBi2L007TYbMi2aLCIVnV66iDU95nyxZ45hn485+DM7j799811Ud1/QSLZJ3+20lWXX1Kmwpvu2MHvPwy/Pd/w4IF8OMfw/33Q49oD8gSkXLsF3cAkdLc4b33gqvfDRwIBx4YjGl8+KGKhkhloMIhWXXhM+O48JlxZa7/8stgvKJvX1i3Dv7xj+B63H366GxukcpChUMqhVmzgkHu44+H6dPhiSeCZT//Oeynn1KRSkVjHBKr776DO++EIUOCk/XuuANuukkn6olUZiocEos1a4KB7sceCy6UdN118Ic/wMGpH3QlIlmmwiFZtW3Lfsz96Ee0+X1wlvcvfgF33QWtW8edTEQqSoVDsmbBAvjk3q6sXFqDvn3h3nuhc+e4U4lIqlQ4JCsWL4ZevWDH5hqMHRscOSUiuUmFQyK3fDn07h3MVjvq/e106wZQLe5YIrKPdKCjRGr16mDSwW++gf/7P3hiynguGzo+7lgikgYVDonM+vXBiXyzZgXXxzj55LgTiUgmqKtKIlFcHFwTY+LEYPrzM86IO5GIZIoKh2Tc5s3BWeCffBJMGdKvX9yJRCSTVDgko7ZtC6YJee89+N//hYsuijuRiGSaCodkzI4dcPnl8MYb8OijcMUVe25zfrdmWc8lIpmlwiEZ4Q6//jW89BL85S/wm98k3+6C7s2zG0xEMk5HVUna3OG3vw2u0vf738Ntt5W97eoftrD6hy3ZCyciGacWh6TtzjuDa4Fff33Q2tibX700EYBXr+mZhWQiEgW1OCQtDz4YFI7LLw/GNXSxJZGqT4VD9tn//A/ccgtceCE895wuuCSSLyL9r25mfcxstpnNNbNbk6xvaGZvmtkUMxtvZh0T1v3GzKaZ2XQzG5yw/A4z+87MisLbWVF+BknuxReDa2iccw4MGwbVNPWUSN6IrHCYWTXgSaAv0B64yMzal9rsNqDI3TsBlwCPhft2BK4CegCdgbPNrG3Cfn919y7hbVRUn0GSe/31oGuqVy/45z+hRo24E4lINkXZ4ugBzHX3+e6+BXgFKH0OcXtgLIC7zwJamdkhwFHAF+5e7O7bgI+B8yLMKhU0alRwUl/PnvDWW1C7dmr7Dzy+JQOPbxlNOBHJiigLR1Pg24Tni8NliSYD/QHMrAfQEmgGTANOMbNGZlYXOAtIPAFgUNi9NcTMGiZ7czO72swmmNmEFStWZOYT5bmPPgqmEjn66GCm23r1Un+NczofyjmdD818OBHJmigLR7Lja7zU8/uAhmZWBFwPFALb3H0mcD8wBniPoMBsC/d5CmgDdAGWAg8ne3N3f9bdu7t794KCgvQ+ifCf/8B550GbNvD++1C//r69zpK1G1mydmNmw4lIVkV5Hsdidm8lNAOWJG7g7uuAywHMzIAF4Q13/zvw93DdPeHr4e7LSvY3s+eAdyL7BLLTzTfDxo3w5pvQuPG+v86NrxYBOo9DJJdF2eL4CmhrZq3NrCYwABiZuIGZNQjXAVwJfBIWE8zs4PC+BUF31svh8yYJL3EeQbeWROjjj4OpRG65Bdq2LX97EanaImtxuPs2MxsEvE9wndAh7j7dzK4N1z9NMAj+opltB2YAidPivW5mjYCtwHXuviZc/oCZdSHo9loIXBPVZxDYujU47LZVq2A6ERGRSKccCQ+VHVVq2dMJj8cBSf+Gdfek14tz94szmVH27vHHYfr04AiqunXjTiMilYHO9ZUyffcd3HEHnH02nHtu3GlEpLLQJIdSpptuCi7M9PjjmXvNq04+LHMvJiKxUOGQpD74IDgr/K67oHXrzL1u7/aHZO7FRCQW6qqSPWzeHAyIt2kTHIabSfNWbGDeig2ZfVERySq1OGQPjzwCX38N776b+pQi5bntjamAzuMQyWVqcchuFi2CP/8Z+veHPn3iTiMilZEKh+xm8ODgYkx//WvcSUSkslJXlew0ahSMGAH33gstWsSdRkQqK7U4BIBNm+CGG+DII4PDcEVEyqIWhwBw//0wb15wGG7NmuVvv6+uP02TXYnkOhUOYf78oHvqwguDq/pF6aS2aUytKyKVgrqq8px70EVVowY8nPTKJpk1fcn3TF/yffRvJCKRUYsjz40cGVzN7+GHoWnp6zNG4K63ZwA6j0Mkl6nFkceKi+E3v4EOHeD66+NOIyK5Qi2OPHbPPcEJfx9/HHRViYhUhFoceerrr+HBB+Hii+GUU+JOIyK5RIUjD7nDoEHBPFQPPhh3GhHJNeqqykPDh8OYMcF1Ng7J8iznt/Rpl903FJGMU+HIMxs2wI03Qpcu8KtfZf/9u7U8KPtvKiIZpcKRZ+66K7gk7GuvQfUY/vUnLloNqICI5DKNceSR6dODWW9/+UvoGdNpFA+8N5sH3psdz5uLSEaocOQJ9+CqfgccAPfdF3caEcll6qrKEy+/HJyv8fTTUFAQdxoRyWVqceSBLVvgd7+D7t3hyivjTiMiuU4tjjzw8suweDE89xxUqxZ3GhHJdSocVZx7cJLf0UfDmWfGnQb+eE77uCOISJpUOKq4d98NjqZ68cXgWuJx63Bo/bgjiEiaNMZRxT34IDRvDgMGxJ0k8NmclXw2Z2XcMUQkDWpxVGHjx8O//hVca6OyzH77xIdzAF0JUCSXqcVRhT34INSvD1ddFXcSEalKVDiqqLlz4Y034Ne/Dk76ExHJFBWOKuqRR4K5qHRlPxHJNBWOKmj5chg6FC65BJo0iTuNiFQ1Ghyvgp58EjZvht/+Nu4ke7qn/9FxRxCRNKlwVDE//AB/+xucey60q4TXTGpTsH/cEUQkTeqqqmKGDIHVq+GWW+JOktwHM5bxwYxlcccQkTSoxVGFbNsWDIqfcEJwq4ye+3Q+AL3bZ/matSKSMSocVcjw4bBwITz6aNxJRKQqU1dVFVEymWG7dnDOOXGnEZGqTC2OKuLDD2HSpGDq9P3054CIREi/YqqIBx6AH/0IBg6MO4mIVHVqcVQBkyfD6NFwzz1Qu3bcafburxd2iTuCiKRJhaMKePBB2H9/uPbauJOU79AGdeKOICJpUldVjlu0CF55Ba6+Gho2jDtN+d6evIS3Jy+JO4aIpCHSwmFmfcxstpnNNbNbk6xvaGZvmtkUMxtvZh0T1v3GzKaZ2XQzG5yw/CAzG2Nmc8L7HPh1GZ1HHw2u7Dd4cNxJKualLxbx0heL4o4hImmIrHCYWTXgSaAv0B64yMxKX3D6NqDI3TsBlwCPhft2BK4CegCdgbPNrG24z63AWHdvC4wNn+elNWuCo6guuii4yp+ISDZE2eLoAcx19/nuvgV4BehXapv2BL/8cfdZQCszOwQ4CvjC3YvdfRvwMXBeuE8/4IXw8QvATyP8DJXaU08Fc1NVxskMRaTqirJwNAW+TXi+OFyWaDLQH8DMegAtgWbANOAUM2tkZnWBs4CSv6kPcfelAOH9wZF9gkps0yZ47DHo0wc6dYo7jYjkkyiPqrIky7zU8/uAx8ysCJgKFALb3H2mmd0PjAE2EBSYbSm9udnVwNUALVq0SC15DnjxxeC6G5V1MkMRqbqiLByL2dVKgKAlsdvhNO6+DrgcwMwMWBDecPe/A38P190Tvh7AMjNr4u5LzawJsDzZm7v7s8CzAN27dy9dsHLa9u3w8MPQrRv8+Mdxp0nNUwO7xR1BRNIUZVfVV0BbM2ttZjWBAcDIxA3MrEG4DuBK4JOwmGBmB4f3LQi6s14OtxsJXBo+vhR4K8LPUCmNHAlffx20NixZu64SO6heTQ6qV7P8DUWk0oqsxeHu28xsEPA+UA0Y4u7TzezacP3TBIPgL5rZdmAGcEXCS7xuZo2ArcB17r4mXH4f8E8zuwL4Brggqs9QGbkH04u0bg39+8edJnWvTQiGvS7orsPARHJVpGeOu/soYFSpZU8nPB4HtC29X7ju5DKWrwJ6ZTBmTvn3v+GLL4Kr/FXPwfP+h08MehxVOERyl84czzEPPACNGsHll8edRETylQpHDpkxA95+GwYNgrp1404jIvlKhSOHPPww1KkD110XdxIRyWcVLhxm1tLMeoeP65jZAdHFktKWLIFhw4IuqoKCuNOISD6r0PCqmV1FcDLdQUAbgnMyniaPB6mz7fHHg/M3brop7iTpef7yHnFHEJE0VbTFcR1wIrAOwN3nkKdTfcRh3bpgXqrzz4c2beJOk546NatRp2a1uGOISBoqWjg2hxMVAmBm1dlz+hCJyLPPBsXj5pvjTpK+YeMWMmzcwrhjiEgaKlo4Pjaz24A6ZnY68BrwdnSxJNHQoXDyydC9e9xJ0vfOlKW8M2Vp3DFEJA0VLRy3AisIJiK8huCkvtujCiW7LF4cHIbbr/SE9CIiManoucd1CKYMeQ52XqSpDlAcVTAJjBkT3J9+erw5RERKVLTFMZagUJSoA3yQ+ThS2ujRcMghcPTRcScREQlUtHDUdvcNJU/Cxzp3OWI7dsAHHwStjVybBVdEqq6KdlX9YGZd3X0SgJl1AzZGF0sAiopg5Uo444y4k2TOq9f0jDuCiKSpooVjMPCamZVciKkJcGEkiWSnkvGN3r3jzSEikqhChcPdvzKzI4F2BJeEneXuWyNNJoweHYxtNGkSd5LMefaTeQBcfUqOn8koksdSmeTwWKATcAxwkZldEk0kASguhs8+q1rdVABjZy5n7MykV/sVkRxR0bmqhhHMUVUEbA8XO/BiNLHkk09gyxYdhisilU9Fxzi6A+3dXdOMZMno0VCrVnDGuIhIZVLRrqppwI+iDCK7GzMGTjpJF2wSkcqnoi2OxsAMMxsPbC5Z6O7nRpIqzy1ZAtOmwcUXx50k82rX0My4IrmuooXjjihDyO4+CM/Jr4rjGy/8UtfjEMl1FT0c9+Oog8guo0cHV/nr3DnuJCIie6rQGIeZHW9mX5nZBjPbYmbbzWxd1OHyUeI0I/tVwSvCPz52Do+PnRN3DBFJQ0V/Nf0NuAiYQzDB4ZXhMsmwqVNh2bKq2U0F8O+5K/n33JVxxxCRNFR0jAN3n2tm1dx9OzDUzD6PMFfe0jTqIlLZVbRwFJtZTaDIzB4AlgL1oouVv0aPhvbtoWnTuJOIiCRX0a6qi8NtBwE/AM2B/lGFylcbNwZnjFe1aUZEpGqpaOH4qbtvcvd17n6nu98EnB1lsHz02WeweXPV7qZqWLcmDevWjDuGiKShol1VlwKPlVp2WZJlkobRo6FGDTj11LiTROfpi7vFHUFE0rTXwmFmFwE/B1qb2ciEVQcCq6IMlo9Kphmpp9EjEanEymtxfE4wEN4YeDhh+XpgSlSh8tGyZTB5MtxzT9xJonX/e7MA+F2fI2NOIiL7aq+Fw90XAYvMrDew0d13mNkRwJHA1GwEzBcl04xU9YHxSYvWxB1BRNJU0cHxT4DaZtYUGAtcDjwfVah8NHo0NGoExxwTdxIRkb2raOEwdy8mOAT3CXc/D2gfXaz84h6Mb/TuXTWnGRGRqqXChcPMegK/AP4vXFbhs85l76ZPh6VLq343lYhUDRX95T8Y+D3wprtPN7PDgI8iS5VnRo8O7qvy+RslmtSvHXcEEUmT5cPVYLt37+4TJkyIO0aZ+vaFhQth5sy4k4iI7GJmE929e+nl5Z3H8ai7Dzazt4E9KoyuAJi+TZvg44/hyivjTiIiUjHldVUNC+8fijpIvvr882COqnwZ37jz7ekA/OmcDjEnEZF9Vd55HBPD+4/NrCB8vCIbwfLF6NFQvXrVnmYk0Ywluv6XSK7b61FVFrjDzFYCs4CvzWyFmf0xO/GqvjFj4IQT4IAD4k4iIlIx5R2OOxg4ETjW3Ru5e0PgOOBEM7sx6nBV3YoVMGlS/nRTiUjVUF7huAS4yN0XlCxw9/nAwHCdpKFkmpF8OAxXRKqO8gbHa7j7HheIdvcVZlYjokx5Y8wYaNgQuuXRTOOHFWjqX5FcV16LY8s+rgPAzPqY2Wwzm2tmtyZZ39DM3jSzKWY23sw6Jqy70cymm9k0M3vZzGqHy+8ws+/MrCi8nVVejsrIPRgY79ULqlWLO0323Nu/E/f27xR3DBFJQ3mFo7OZrUtyWw8cvbcdzawa8CTQl2Beq4vMrPT8VrcBRe7eiaDr67Fw36bADUB3d+8IVAMGJOz3V3fvEt5GVfCzViqzZsF332l8Q0RyT3mH46bzt3APYG44JoKZvQL0A2YkbNMeuDd8r1lm1srMDknIVsfMtgJ1gSVpZKl08mmakUS/fyO4jItaHSK5K8q5WJsC3yY8XxwuSzSZYMZdzKwH0BJo5u7fEZx0+A3BhaS+d/fRCfsNCru3hphZw6g+QJTGjIG2baFVq7iTZNf8FT8wf8UPcccQkTREWTgsybLS05bcBzQ0syLgeqAQ2BYWg35Aa+BQoJ6ZDQz3eQpoA3QhKCoPk4SZXW1mE8xswooVleucxc2b4aOP1E0lIrkpysKxGGie8LwZpbqb3H2du1/u7l0IxjgKgAVAb2CBu69w963AG8AJ4T7L3H27u+8AniPoEtuDuz/r7t3dvXtBQUGGP1p6xo2D4uL866YSkaohysLxFdDWzFqbWU2Cwe2RiRuYWYNwHcCVwCfuvo6gi+p4M6trZgb0AmaG+zRJeInzgGkRfoZIjBkTHEn1X/8VdxIRkdRFdjEmd99mZoOA9wmOihoSXsvj2nD908BRwItmtp1g0PyKcN2XZjYcmARsI+jCejZ86QfMrAtBt9dC4JqoPkNURo+G44+HAw+MO0n2tT80Dz+0SBWj63Fk2apVUFAAd9wBf9SMXyJSiZV1PQ5d4TrLxo4NTv7T+IaI5CoVjiwbMwbq14djj407STwGv1LI4FcK444hImmIbIxD9pQ4zUj1PP3ml36/Ke4IIpImtTiyaM4c+OYbdVOJSG5T4ciikmlGdOKfiOQyFY4sGj0aDjssuImI5Ko87WnPvq1bg2lGBg4sf9uqrGvLnJxaTEQSqHBkyRdfwIYN6qb6XZ8j444gImlSV1WWjBkD++2naUZEJPepcGTJ6NFw3HHQoEHcSeJ17bCJXDtsYtwxRCQNKhxZsGYNfPWVDsMFWFO8hTXF5V51WEQqMRWOLPjwQ9ixQ+MbIlI1qHBkwejRcMAB0CPplUNERHKLCkfESqYZOe00qFEj7jQiIunT4bgRmzcPFi6Em2+OO0nlcOLhjeOOICJpUuGI2Jgxwb0GxgM39GobdwQRSZO6qiL24YfQogUcfnjcSUREMkOFI2ITJwaXiTWLO0nlcOmQ8Vw6ZHzcMUQkDSocEVqzBhYsgGOOiTtJ5bFp63Y2bd0edwwRSYMKR4SKioJ7FQ4RqUpUOCJUGF4hVYVDRKoSFY4IFRZC06Zw8MFxJxERyRwdjhuhwkK1NkrrdZSqqEiuU+GISHExzJwJ/fvHnaRyufqUNnFHEJE0qasqIlOnBhMbqsUhIlWNCkdENDCe3IXPjOPCZ8bFHUNE0qDCEZHCQmjYEFq2jDuJiEhmqXBEZNKkoLWhM8ZFpKpR4YjA1q3BGIe6qUSkKlLhiMCsWbB5swqHiFRNOhw3AiUD4127xpujMjq7U5O4I4hImlQ4IlBYCHXrwhFHxJ2k8rm4Z6u4I4hImtRVFYFJk6BTJ6hWLe4klc/GLdvZuEWz44rkMhWODNuxI5gVV+MbyV02dDyXDdX1OERymQpHhi1YAOvWaXxDRKouFY4M0xnjIlLVqXBk2KRJUL06dOwYdxIRkWiocGRYYSG0bw+1asWdREQkGjocN8MKC6Fv37hTVF7nd2sWdwQRSZMKRwYtXQrLlml8Y28u6N487ggikiZ1VWXQpEnBvQpH2Vb/sIXVP2yJO4aIpEEtjgwqOaKqc+d4c1Rmv3ppIgCvXtMz5iQisq/U4sigwkI4/HA48MC4k4iIREeFI4MKC3Xin4hUfSocGbJmTXDWuMY3RKSqi7RwmFkfM5ttZnPN7NYk6xua2ZtmNsXMxptZx4R1N5rZdDObZmYvm1ntcPlBZjbGzOaE9w2j/AwVVVQU3KtwiEhVF1nhMLNqwJNAX6A9cJGZtS+12W1Akbt3Ai4BHgv3bQrcAHR3945ANWBAuM+twFh3bwuMDZ/HTlONVMzA41sy8HhdiF0kl0XZ4ugBzHX3+e6+BXgF6Fdqm/YEv/xx91lAKzM7JFxXHahjZtWBusCScHk/4IXw8QvATyP7BCkoLISmTeHgg+NOUrmd0/lQzul8aNwxRCQNURaOpsC3Cc8Xh8sSTQb6A5hZD6Al0MzdvwMeAr4BlgLfu/vocJ9D3H0pQHif9Fe1mV1tZhPMbMKKFSsy9JHKNmmSWhsVsWTtRpas3Rh3DBFJQ5SFw5Is81LP7wMamlkRcD1QCGwLxy36Aa2BQ4F6ZjYwlTd392fdvbu7dy8oKEg5fCqKi4PrjKtwlO/GV4u48dWiuGOISBqiPAFwMZA4v0QzdnU3AeDu64DLAczMgAXh7UxggbuvCNe9AZwAvAQsM7Mm7r7UzJoAyyP8DBUydWpwAScVDhHJB1G2OL4C2ppZazOrSTC4PTJxAzNrEK4DuBL4JCwm3wDHm1ndsKD0AmaG240ELg0fXwq8FeFnqJCSgXGdwyEi+SCyFoe7bzOzQcD7BEdFDXH36WZ2bbj+aeAo4EUz2w7MAK4I131pZsOBScA2gi6sZ8OXvg/4p5ldQVBgLojqM1RUYSE0bAgtWsSdREQkepHOVeXuo4BRpZY9nfB4HNC2jH3/BPwpyfJVBC2QSqNkYNySjeqIiFQxmuQwTVu3BmMcgwbFnSQ3XHXyYXFHEJE0qXCkadYs2LxZ4xsV1bv9IeVvJCKVmuaqSpPOGE/NvBUbmLdiQ9wxRCQNanGkadIkqFsXjjgi7iS54bY3pgK6HodILlOLI02FhdCpE1SrFncSEZHsUOFIw44dway46qYSkXyiwpGGBQtg3ToNjItIflHhSMOkScG9Whwikk80OJ6GwkKoXh06dix/Wwlcf1rS8z1FJIeocKShsBDat4dateJOkjtOats47ggikiZ1Ve0j96CrSuMbqZm+5HumL/k+7hgikgYVjn20dCksX67xjVTd9fYM7np7RtwxRCQNKhz7SGeMi0i+UuHYRyWFo3PneHOIiGSbCsc+KiyEtm3hwAPjTiIikl0qHPuo5BocIiL5Rofj7oM1a2DhQrjmmriT5J5b+rSLO4KIpEmFYx8UFQX3anGkrlvLg+KOICJpUlfVPtARVftu4qLVTFy0Ou4YIpIGFY59UFgITZvCwQfHnST3PPDebB54b3bcMUQkDSoc+0AD4yKSz1Q4UlRcHFxnXIVDRPKVCkeKpk4NLuCkwiEi+UqFI0UlA+Oa3FBE8pUOx03RpEnQsCG0aBF3ktz0x3Paxx1BRNKkwpGiwsKgm8os7iS5qcOh9eOOICJpUldVCrZuDcY4NL6x7z6bs5LP5qyMO4aIpEEtjhTMmgWbN2t8Ix1PfDgH0JUARXKZWhwpmDQpuFeLQ0TymQpHCgoLoW5dOOKIuJOIiMRHhSMFhYXQqRNUqxZ3EhGR+KhwVNCOHcGsuBrfEJF8p8HxCpo/H9at0/hGuu7pf3TcEUQkTSocFaSp1DOjTcH+cUcQkTSpq6qCCguhenXo2DHuJLntgxnL+GDGsrhjiEga1OKooMJC6NABatWKO0lue+7T+QD0bn9IzElEZF+pxVEB7roGh4hICRWOCli6FJYvV+EQEQEVjgrRwLiIyC4qHBVQUjg6d443h4hIZaDB8QooLIS2beHAA+NOkvv+emGXuCOISJpUOCpg0iTo0SPuFFXDoQ3qxB1BRNKkrqpyrFkDCxdqfCNT3p68hLcnL4k7hoikQS2OchQVBfcqHJnx0heLADin86ExJxGRfRVpi8PM+pjZbDOba2a3Jlnf0MzeNLMpZjbezDqGy9uZWVHCbZ2ZDQ7X3WFm3yWsOyvKz6AjqkREdhdZi8PMqgFPAqcDi4GvzGyku89I2Ow2oMjdzzOzI8Pte7n7bKBLwut8B7yZsN9f3f2hqLInmjQJmjaFgw/OxruJiFR+UbY4egBz3X2+u28BXgH6ldqmPTAWwN1nAa3MrPRcFL2Aee6+KMKsZSosVGtDRCRRlIWjKfBtwvPF4bJEk4H+AGbWA2gJNCu1zQDg5VLLBoXdW0PMrGGyNzezq81sgplNWLFixT59gOLi4DrjKhwiIrtEWTgsyTIv9fw+oKGZFQHXA4XAtp0vYFYTOBd4LWGfp4A2BF1ZS4GHk725uz/r7t3dvXtBQcE+fYCpU4MLOOniTZnz1MBuPDWwW9wxRCQNUR5VtRhonvC8GbDbcZjuvg64HMDMDFgQ3kr0BSa5+7KEfXY+NrPngHcynjw0aVJwrxZH5hxUr2bcEUQkTVG2OL4C2ppZ67DlMAAYmbiBmTUI1wFcCXwSFpMSF1Gqm8rMmiQ8PQ+YlvHkocJCaNgQWrSI6h3yz2sTvuW1Cd+Wv6GIVFqRtTjcfZuZDQLeB6oBQ9x9upldG65/GjgKeNHMtgMzgCtK9jezugRHZF1T6qUfMLMuBN1eC5Osz5j//m/4xS/AknW6yT4ZPnExABd0b17OliJSWUV6AqC7jwJGlVr2dMLjcUDbMvYtBholWX5xhmOWqXnz4CYiIrtoyhEREUmJCoeIiKREhUNERFKiSQ4lq56/XPPTi+Q6FQ7Jqjo1q8UdQUTSpK4qyaph4xYybNzCuGOISBpUOCSr3pmylHemLI07hoikQYVDRERSosIhIiIpUeEQEZGUqHCIiEhKdDiuZNWr1/SMO4KIpEktDhERSYkKh4iIpESFQ0REUqLCISIiKVHhEBGRlKhwiIhISlQ4REQkJSocIiKSEhUOERFJiQqHiIikRIVDRERSosIhIiIpUeEQEZGUqHCIiEhKzN3jzhA5M1sBLNrH3RsDKzMYJyq5khNyJ6tyZl6uZFXOQEt3Lyi9MC8KRzrMbIK7d487R3lyJSfkTlblzLxcyaqce6euKhERSYkKh4iIpESFo3zPxh2ggnIlJ+ROVuXMvFzJqpx7oTEOERFJiVocIiKSEhWOkJn1MbPZZjbXzG5Nst7M7PFw/RQz6xpDxuZm9pGZzTSz6Wb2myTb/NjMvjezovD2x2znTMiy0MymhjkmJFlfGb7TdgnfVZGZrTOzwaW2ieU7NbMhZrbczKYlLDvIzMaY2ZzwvmEZ++715zkLOR80s1nhv+ubZtagjH33+jOSpax3mNl3Cf++Z5Wxb9zf6asJGReaWVEZ+0b/nbp73t+AasA84DCgJjAZaF9qm7OAdwEDjge+jCFnE6Br+PgA4OskOX8MvBP3dxpmWQg03sv62L/TJD8H/yE4dj327xQ4BegKTEtY9gBwa/j4VuD+Mj7HXn+es5DzDKB6+Pj+ZDkr8jOSpax3AL+twM9GrN9pqfUPA3+M6ztViyPQA5jr7vPdfQvwCtCv1Db9gBc98AXQwMyaZDOkuy9190nh4/XATKBpNjNkWOzfaSm9gHnuvq8ni2aUu38CrC61uB/wQvj4BeCnSXatyM9zpDndfbS7bwuffgE0i+r9U1HGd1oRsX+nJczMgP8HeDmq9y+PCkegKfBtwvPF7PkLuSLbZI2ZtQKOAb5MsrqnmU02s3fNrEN2k+3GgdFmNtHMrk6yvlJ9p8AAyv7PWFm+00PcfSkEf0gAByfZprJ9r78kaFkmU97PSLYMCrvVhpTR/VeZvtOTgWXuPqeM9ZF/pyocAUuyrPThZhXZJivMbH/gdWCwu68rtXoSQVdLZ+AJYESW4yU60d27An2B68zslFLrK9N3WhM4F3gtyerK9J1WRGX6Xv8AbAP+UcYm5f2MZMNTQBugC7CUoBuotErznQIXsffWRuTfqQpHYDHQPOF5M2DJPmwTOTOrQVA0/uHub5Re7+7r3H1D+HgUUMPMGmc5ZkmWJeH9cuBNguZ+okrxnYb6ApPcfVnpFZXpOwWWlXTnhffLk2xTKb5XM7sUOBv4hYed76VV4Gckcu6+zN23u/sO4LkyMlSW77Q60B94taxtsvGdqnAEvgLamlnr8C/PAcDIUtuMBC4JjwQ6Hvi+pMsgW8K+zb8DM939kTK2+VG4HWbWg+DfeFX2Uu7MUc/MDih5TDBYOq3UZrF/pwnK/CuusnynoZHApeHjS4G3kmxTkZ/nSJlZH+B3wLnuXlzGNhX5GYlcqXG188rIEPt3GuoNzHL3xclWZu07jXLkPZduBEf4fE1w5MQfwmXXAteGjw14Mlw/FegeQ8aTCJrHU4Ci8HZWqZyDgOkER318AZwQ0/d5WJhhcpinUn6nYY66BIWgfsKy2L9TgkK2FNhK8BfvFUAjYCwwJ7w/KNz2UGDU3n6es5xzLsGYQMnP6dOlc5b1MxJD1mHhz98UgmLQpDJ+p+Hy50t+LhO2zfp3qjPHRUQkJeqqEhGRlKhwiIhISlQ4REQkJSocIiKSEhUOERFJiQqHSAaY2fZwNtJpZvaamdXdy7bnlje7qpm1MrOfZz6pSPpUOEQyY6O7d3H3jsAWgvNAknL3ke5+Xzmv1wpQ4ZBKSYVDJPM+BQ634NoZI8LJ874ws04AZnaZmf0tfPy8Bdck+dzM5pvZ+eFr3AecHLZibjSzDmY2Pnw+xczaxvTZRFQ4RDIpnEuoL8GZyHcChe7eCbgNeLGM3ZoQzApwNkHBgOBaG5+GrZi/ErRgHnP3LkB3grOJRWJRPe4AIlVEnYQrsn1KMKfYl8DPANz9QzNrZGb1k+w7woMJ9maY2SFlvP444A9m1gx4w8ueUlskciocIpmxMWwN7FQyMWIpyeb42Zy4W7IXd/f/z8y+BH4CvG9mV7r7h/saViQd6qoSic4nwC8guG45sNL3vH5KWdYTXB6YcP/DgPnu/jjBRHydMppUJAVqcYhE5w5gqJlNAYrZNR16RUwBtpnZZIIZUWsDA81sK8F10e/KbFSRitPsuCIikhJ1VYmISEpUOEREJCUqHCIikhIVDhERSYkKh4iIpESFQ0REUqLCISIiKVHhEBGRlPz/G+lIREFzq0YAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x432 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Knee Locator k = 7\n"
     ]
    }
   ],
   "source": [
    "from kneed import KneeLocator\n",
    "\n",
    "i = np.arange(len(var_ratio))\n",
    "variance_ratio= list(var_ratio.values())\n",
    "components=  list(var_ratio.keys())\n",
    "knee = KneeLocator(i, variance_ratio, S=1, curve='concave', interp_method='polynomial')\n",
    "\n",
    "fig = plt.figure(figsize=(5, 5))\n",
    "knee.plot_knee()\n",
    "plt.xlabel(\"Points\")\n",
    "plt.ylabel(\"Distance\")\n",
    "plt.show()\n",
    "k= components[knee.knee]\n",
    "print('Knee Locator k =', k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "f33873e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reducing the dimensions of the data \n",
    "pca_final=PCA(n_components=7,random_state=42).fit(X_resampled)\n",
    "\n",
    "reduced=pca_final.fit_transform(X_pca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "id": "4d0a3e1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Resampling the minority class. The strategy can be changed as required.\n",
    "smt =RandomOverSampler(random_state=42)\n",
    "# Fit the model to generate the data.\n",
    "X_res, y_res = smt.fit_resample(reduced, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "ae4770cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test=train_test_split(X_res,y_res,random_state=42,test_size=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "id": "ea6f6346",
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9956\n",
      "- F1 score: 0.9956\n",
      "- Precision: 0.9957\n",
      "- Recall: 0.9956\n",
      "- COST: 6120.\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9963\n",
      "- F1 score: 0.9963\n",
      "- Precision: 0.9964\n",
      "- Recall: 0.9963\n",
      "- COST: 5100.\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boosting\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9989\n",
      "- F1 score: 0.9989\n",
      "- Precision: 0.9989\n",
      "- Recall: 0.9989\n",
      "- COST: 6120.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9960\n",
      "- F1 score: 0.9960\n",
      "- Precision: 0.9960\n",
      "- Recall: 0.9960\n",
      "- COST: 5610.\n",
      "===================================\n",
      "\n",
      "\n",
      "K-Neighbors Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9933\n",
      "- F1 score: 0.9933\n",
      "- Precision: 0.9935\n",
      "- Recall: 0.9933\n",
      "- COST: 37230.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9912\n",
      "- F1 score: 0.9912\n",
      "- Precision: 0.9915\n",
      "- Recall: 0.9912\n",
      "- COST: 12240.\n",
      "===================================\n",
      "\n",
      "\n",
      "XGBClassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9952\n",
      "- F1 score: 0.9952\n",
      "- Precision: 0.9953\n",
      "- Recall: 0.9952\n",
      "- COST: 6630.\n",
      "===================================\n",
      "\n",
      "\n",
      "CatBoosting Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9999\n",
      "- F1 score: 0.9999\n",
      "- Precision: 0.9999\n",
      "- Recall: 0.9999\n",
      "- COST: 510.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9945\n",
      "- F1 score: 0.9945\n",
      "- Precision: 0.9946\n",
      "- Recall: 0.9945\n",
      "- COST: 7650.\n",
      "===================================\n",
      "\n",
      "\n",
      "AdaBoost Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.5872\n",
      "- F1 score: 0.4905\n",
      "- Precision: 0.5217\n",
      "- Recall: 0.5872\n",
      "- COST: 2304180.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.5928\n",
      "- F1 score: 0.4940\n",
      "- Precision: 0.5286\n",
      "- Recall: 0.5928\n",
      "- COST: 568140.\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model Name</th>\n",
       "      <th>Cost</th>\n",
       "      <th>Train_Recall</th>\n",
       "      <th>Test_Recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>5100</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.996345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gradient Boosting</td>\n",
       "      <td>5610</td>\n",
       "      <td>0.998904</td>\n",
       "      <td>0.995980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>6120</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.995614</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>XGBClassifier</td>\n",
       "      <td>6630</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.995249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CatBoosting Classifier</td>\n",
       "      <td>7650</td>\n",
       "      <td>0.999909</td>\n",
       "      <td>0.994518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K-Neighbors Classifier</td>\n",
       "      <td>12240</td>\n",
       "      <td>0.993330</td>\n",
       "      <td>0.991228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AdaBoost Classifier</td>\n",
       "      <td>568140</td>\n",
       "      <td>0.587171</td>\n",
       "      <td>0.592836</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Model Name    Cost  Train_Recall  Test_Recall\n",
       "1           Decision Tree    5100      1.000000     0.996345\n",
       "2       Gradient Boosting    5610      0.998904     0.995980\n",
       "0           Random Forest    6120      1.000000     0.995614\n",
       "4           XGBClassifier    6630      1.000000     0.995249\n",
       "5  CatBoosting Classifier    7650      0.999909     0.994518\n",
       "3  K-Neighbors Classifier   12240      0.993330     0.991228\n",
       "6     AdaBoost Classifier  568140      0.587171     0.592836"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report3 = evaluate_models(X_res, y_res, models)\n",
    "report3"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "7c96c654",
   "metadata": {},
   "source": [
    "# lets do feature importance for our Decision Tree model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "d2b0d557",
   "metadata": {},
   "outputs": [],
   "source": [
    "model=DecisionTreeClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "8ca97a3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test=train_test_split(X_resampled,y_resampled,test_size=0.2,random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "f49923d9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-5 {color: black;background-color: white;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-15\" type=\"checkbox\" checked><label for=\"sk-estimator-id-15\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "DecisionTreeClassifier()"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "e78d6135",
   "metadata": {},
   "outputs": [],
   "source": [
    "importances = model.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "64192da5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.51517383e-02, 3.27814285e-01, 6.96784270e-03, 3.51186554e-01,\n",
       "       2.39068771e-04, 2.86829964e-01, 0.00000000e+00, 1.03491748e-02,\n",
       "       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n",
       "       1.46137176e-03, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n",
       "       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n",
       "       0.00000000e+00])"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "d1a17d06",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>age</td>\n",
       "      <td>0.015152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sex</td>\n",
       "      <td>0.327814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>on_thyroxine</td>\n",
       "      <td>0.006968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>query_on_thyroxine</td>\n",
       "      <td>0.351187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>on_antithyroid_medication</td>\n",
       "      <td>0.000239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>sick</td>\n",
       "      <td>0.286830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pregnant</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>thyroid_surgery</td>\n",
       "      <td>0.010349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>I131_treatment</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>query_hypothyroid</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>query_hyperthyroid</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>lithium</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>goitre</td>\n",
       "      <td>0.001461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>tumor</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>hypopituitary</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>psych</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>TSH</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>T3</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>TT4</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>T4U</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>FTI</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 feature_name  importance\n",
       "0                         age    0.015152\n",
       "1                         sex    0.327814\n",
       "2                on_thyroxine    0.006968\n",
       "3          query_on_thyroxine    0.351187\n",
       "4   on_antithyroid_medication    0.000239\n",
       "5                        sick    0.286830\n",
       "6                    pregnant    0.000000\n",
       "7             thyroid_surgery    0.010349\n",
       "8              I131_treatment    0.000000\n",
       "9           query_hypothyroid    0.000000\n",
       "10         query_hyperthyroid    0.000000\n",
       "11                    lithium    0.000000\n",
       "12                     goitre    0.001461\n",
       "13                      tumor    0.000000\n",
       "14              hypopituitary    0.000000\n",
       "15                      psych    0.000000\n",
       "16                        TSH    0.000000\n",
       "17                         T3    0.000000\n",
       "18                        TT4    0.000000\n",
       "19                        T4U    0.000000\n",
       "20                        FTI    0.000000"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create a DataFrame with feature names and importances\n",
    "features_df = pd.DataFrame({'feature_name': X.columns, 'importance': importances})\n",
    "features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "6041f531",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sort the features by importance\n",
    "features_df = features_df.sort_values('importance', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "9ca56231",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature_name</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>query_on_thyroxine</td>\n",
       "      <td>0.351187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>sex</td>\n",
       "      <td>0.327814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>sick</td>\n",
       "      <td>0.286830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>age</td>\n",
       "      <td>0.015152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>thyroid_surgery</td>\n",
       "      <td>0.010349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>on_thyroxine</td>\n",
       "      <td>0.006968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>goitre</td>\n",
       "      <td>0.001461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>on_antithyroid_medication</td>\n",
       "      <td>0.000239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>hypopituitary</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>T4U</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>TT4</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>T3</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>TSH</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>psych</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>query_hyperthyroid</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>tumor</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>lithium</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>query_hypothyroid</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>I131_treatment</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>pregnant</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>FTI</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 feature_name  importance\n",
       "3          query_on_thyroxine    0.351187\n",
       "1                         sex    0.327814\n",
       "5                        sick    0.286830\n",
       "0                         age    0.015152\n",
       "7             thyroid_surgery    0.010349\n",
       "2                on_thyroxine    0.006968\n",
       "12                     goitre    0.001461\n",
       "4   on_antithyroid_medication    0.000239\n",
       "14              hypopituitary    0.000000\n",
       "19                        T4U    0.000000\n",
       "18                        TT4    0.000000\n",
       "17                         T3    0.000000\n",
       "16                        TSH    0.000000\n",
       "15                      psych    0.000000\n",
       "10         query_hyperthyroid    0.000000\n",
       "13                      tumor    0.000000\n",
       "11                    lithium    0.000000\n",
       "9           query_hypothyroid    0.000000\n",
       "8              I131_treatment    0.000000\n",
       "6                    pregnant    0.000000\n",
       "20                        FTI    0.000000"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "48bafaca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Top 10 features:\n",
      "['query_on_thyroxine', 'sex', 'sick', 'age', 'thyroid_surgery', 'on_thyroxine', 'goitre', 'on_antithyroid_medication', 'hypopituitary', 'T4U']\n"
     ]
    }
   ],
   "source": [
    "# Get the top n features\n",
    "n = 10\n",
    "top_n_features = list(features_df['feature_name'][:n])\n",
    "\n",
    "# Print the top n features\n",
    "print(f'Top {n} features:')\n",
    "print(top_n_features)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "916a7027",
   "metadata": {},
   "source": [
    "# lets try to build model on selected features from RFECV and check performance of train and test dataset "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "11996420",
   "metadata": {},
   "source": [
    "selected features are Index(['sex', 'on_thyroxine', 'query_on_thyroxine',\n",
    "       'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment',\n",
    "       'tumor', 'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI'],\n",
    "      dtype='object')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "4ab08667",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['sex', 'on_thyroxine', 'query_on_thyroxine',\n",
       "       'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment',\n",
       "       'tumor', 'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.columns[[1,2,3,4,5,6,8,13,14,15,16,17,18,19,20]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "d4477647",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1=pd.read_csv(\"Thyroid_EDA.csv\",usecols=['sex', 'on_thyroxine', 'query_on_thyroxine',\n",
    "       'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment',\n",
    "       'tumor', 'hypopituitary', 'psych', 'TSH', 'T3', 'TT4', 'T4U', 'FTI','Class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "e0fca741",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3711, 16)"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "5c8b7854",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2.5</td>\n",
       "      <td>125.0</td>\n",
       "      <td>1.14</td>\n",
       "      <td>109.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>4.10</td>\n",
       "      <td>2.0</td>\n",
       "      <td>102.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>109.0</td>\n",
       "      <td>0.91</td>\n",
       "      <td>120.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.16</td>\n",
       "      <td>1.9</td>\n",
       "      <td>175.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.72</td>\n",
       "      <td>1.2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>0.87</td>\n",
       "      <td>70.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick pregnant  \\\n",
       "0   F            f                  f                         f    f        f   \n",
       "1   F            f                  f                         f    f        f   \n",
       "2   M            f                  f                         f    f        f   \n",
       "3   F            t                  f                         f    f        f   \n",
       "4   F            f                  f                         f    f        f   \n",
       "\n",
       "  I131_treatment tumor hypopituitary psych   TSH   T3    TT4   T4U    FTI  \\\n",
       "0              f     f             f     f  1.30  2.5  125.0  1.14  109.0   \n",
       "1              f     f             f     f  4.10  2.0  102.0   NaN    NaN   \n",
       "2              f     f             f     f  0.98  NaN  109.0  0.91  120.0   \n",
       "3              f     f             f     f  0.16  1.9  175.0   NaN    NaN   \n",
       "4              f     f             f     f  0.72  1.2   61.0  0.87   70.0   \n",
       "\n",
       "      Class  \n",
       "0  negative  \n",
       "1  negative  \n",
       "2  negative  \n",
       "3  negative  \n",
       "4  negative  "
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "2dad7827",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['TSH', 'T3', 'TT4', 'T4U', 'FTI']\n"
     ]
    }
   ],
   "source": [
    "Num_cols=[cols for cols in df1.columns if df1[cols].dtype!='O']\n",
    "print(Num_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "9e5ebc05",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sex', 'on_thyroxine', 'query_on_thyroxine', 'on_antithyroid_medication', 'sick', 'pregnant', 'I131_treatment', 'tumor', 'hypopituitary', 'psych', 'Class']\n"
     ]
    }
   ],
   "source": [
    "Cat_cols=[cols for cols in df1.columns if df1[cols].dtype=='O']\n",
    "print(Cat_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "207c0e2f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3711, 15)\n"
     ]
    }
   ],
   "source": [
    "#lets do separation of target variable \n",
    "X=df1.drop('Class',axis=1)\n",
    "print(X.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "7e29e5af",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Lets do transformation pipeline for Numerical and Categorical Columns \n",
    "numeric_transformer =Pipeline(steps=[\n",
    "                        ('imputer',SimpleImputer(strategy='median',missing_values=np.nan)),\n",
    "                        ('robust_scaler',RobustScaler())])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "dcd95868",
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_transformer = Pipeline(steps=[\n",
    "                        ('imputer', SimpleImputer(strategy='most_frequent')),\n",
    "                        ('onehot', OneHotEncoder(drop='first',handle_unknown='ignore'))])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "c8ebb780",
   "metadata": {},
   "outputs": [],
   "source": [
    "Cat_cols.remove('Class')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "8bcf580e",
   "metadata": {},
   "outputs": [],
   "source": [
    " preprocessor_final = ColumnTransformer([\n",
    "                            ('num',numeric_transformer,Num_cols),\n",
    "                            ('cat', categorical_transformer, Cat_cols)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "1bc863f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "step2=DecisionTreeClassifier()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "ee0638c6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-6 {color: black;background-color: white;}#sk-container-id-6 pre{padding: 0;}#sk-container-id-6 div.sk-toggleable {background-color: white;}#sk-container-id-6 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-6 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-6 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-6 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-6 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-6 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-6 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-6 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-6 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-6 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-6 div.sk-item {position: relative;z-index: 1;}#sk-container-id-6 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-6 div.sk-item::before, #sk-container-id-6 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-6 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-6 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-6 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-6 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-6 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-6 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-6 div.sk-label-container {text-align: center;}#sk-container-id-6 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-6 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-6\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor1&#x27;,\n",
       "                 ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                                  (&#x27;robust_scaler&#x27;,\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;,\n",
       "                                                   &#x27;FTI&#x27;]),\n",
       "                                                 (&#x27;cat&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                                  (&#x27;onehot&#x27;,\n",
       "                                                                   OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                                 handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                                  [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;,\n",
       "                                                   &#x27;query_on_thyroxine&#x27;,\n",
       "                                                   &#x27;on_antithyroid_medication&#x27;,\n",
       "                                                   &#x27;sick&#x27;, &#x27;pregnant&#x27;,\n",
       "                                                   &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                                   &#x27;hypopituitary&#x27;,\n",
       "                                                   &#x27;psych&#x27;])])),\n",
       "                (&#x27;step2&#x27;, DecisionTreeClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-16\" type=\"checkbox\" ><label for=\"sk-estimator-id-16\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;preprocessor1&#x27;,\n",
       "                 ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                                  (&#x27;robust_scaler&#x27;,\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;,\n",
       "                                                   &#x27;FTI&#x27;]),\n",
       "                                                 (&#x27;cat&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                                  (&#x27;onehot&#x27;,\n",
       "                                                                   OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                                 handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                                  [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;,\n",
       "                                                   &#x27;query_on_thyroxine&#x27;,\n",
       "                                                   &#x27;on_antithyroid_medication&#x27;,\n",
       "                                                   &#x27;sick&#x27;, &#x27;pregnant&#x27;,\n",
       "                                                   &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                                   &#x27;hypopituitary&#x27;,\n",
       "                                                   &#x27;psych&#x27;])])),\n",
       "                (&#x27;step2&#x27;, DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-17\" type=\"checkbox\" ><label for=\"sk-estimator-id-17\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor1: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                 (&#x27;robust_scaler&#x27;,\n",
       "                                                  RobustScaler())]),\n",
       "                                 [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]),\n",
       "                                (&#x27;cat&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                 (&#x27;onehot&#x27;,\n",
       "                                                  OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                 [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;,\n",
       "                                  &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;,\n",
       "                                  &#x27;pregnant&#x27;, &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                  &#x27;hypopituitary&#x27;, &#x27;psych&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-18\" type=\"checkbox\" ><label for=\"sk-estimator-id-18\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>[&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-19\" type=\"checkbox\" ><label for=\"sk-estimator-id-19\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-20\" type=\"checkbox\" ><label for=\"sk-estimator-id-20\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RobustScaler</label><div class=\"sk-toggleable__content\"><pre>RobustScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-21\" type=\"checkbox\" ><label for=\"sk-estimator-id-21\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;, &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;, &#x27;pregnant&#x27;, &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;, &#x27;hypopituitary&#x27;, &#x27;psych&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-22\" type=\"checkbox\" ><label for=\"sk-estimator-id-22\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;most_frequent&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-23\" type=\"checkbox\" ><label for=\"sk-estimator-id-23\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-24\" type=\"checkbox\" ><label for=\"sk-estimator-id-24\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "Pipeline(steps=[('preprocessor1',\n",
       "                 ColumnTransformer(transformers=[('num',\n",
       "                                                  Pipeline(steps=[('imputer',\n",
       "                                                                   SimpleImputer(strategy='median')),\n",
       "                                                                  ('robust_scaler',\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  ['TSH', 'T3', 'TT4', 'T4U',\n",
       "                                                   'FTI']),\n",
       "                                                 ('cat',\n",
       "                                                  Pipeline(steps=[('imputer',\n",
       "                                                                   SimpleImputer(strategy='most_frequent')),\n",
       "                                                                  ('onehot',\n",
       "                                                                   OneHotEncoder(drop='first',\n",
       "                                                                                 handle_unknown='ignore'))]),\n",
       "                                                  ['sex', 'on_thyroxine',\n",
       "                                                   'query_on_thyroxine',\n",
       "                                                   'on_antithyroid_medication',\n",
       "                                                   'sick', 'pregnant',\n",
       "                                                   'I131_treatment', 'tumor',\n",
       "                                                   'hypopituitary',\n",
       "                                                   'psych'])])),\n",
       "                ('step2', DecisionTreeClassifier())])"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe=Pipeline([('preprocessor1',preprocessor_final),\n",
    "               ('step2',step2)])\n",
    "pipe.fit(X,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "702a64f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_scaled_final=preprocessor_final.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "014bdd23",
   "metadata": {},
   "outputs": [],
   "source": [
    "from imblearn.over_sampling import SMOTE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "4da2ba78",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-8 {color: black;background-color: white;}#sk-container-id-8 pre{padding: 0;}#sk-container-id-8 div.sk-toggleable {background-color: white;}#sk-container-id-8 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-8 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-8 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-8 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-8 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-8 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-8 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-8 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-8 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-8 div.sk-item {position: relative;z-index: 1;}#sk-container-id-8 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-8 div.sk-item::before, #sk-container-id-8 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-8 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-8 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-8 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-8 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-8 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-8 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-8 div.sk-label-container {text-align: center;}#sk-container-id-8 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-8 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LabelEncoder()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-26\" type=\"checkbox\" checked><label for=\"sk-estimator-id-26\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LabelEncoder</label><div class=\"sk-toggleable__content\"><pre>LabelEncoder()</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "LabelEncoder()"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "le=joblib.load('label_encoder.joblib')\n",
    "le"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "def3d4a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 1, ..., 1, 1, 1], dtype=int64)"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y=le.fit_transform(y)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "1b627d51",
   "metadata": {},
   "outputs": [],
   "source": [
    "smt=SMOTE(random_state=42,k_neighbors=1)\n",
    "X_res1,y_res1=smt.fit_resample(X_scaled_final,y,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "id": "831fa33f",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9978\n",
      "- F1 score: 0.9978\n",
      "- Precision: 0.9978\n",
      "- Recall: 0.9978\n",
      "- COST: 3060.\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9974\n",
      "- F1 score: 0.9974\n",
      "- Precision: 0.9975\n",
      "- Recall: 0.9974\n",
      "- COST: 3570.\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boosting\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9994\n",
      "- F1 score: 0.9994\n",
      "- Precision: 0.9994\n",
      "- Recall: 0.9994\n",
      "- COST: 3570.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9938\n",
      "- F1 score: 0.9938\n",
      "- Precision: 0.9938\n",
      "- Recall: 0.9938\n",
      "- COST: 8670.\n",
      "===================================\n",
      "\n",
      "\n",
      "K-Neighbors Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9938\n",
      "- F1 score: 0.9938\n",
      "- Precision: 0.9939\n",
      "- Recall: 0.9938\n",
      "- COST: 34680.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9916\n",
      "- F1 score: 0.9916\n",
      "- Precision: 0.9917\n",
      "- Recall: 0.9916\n",
      "- COST: 11730.\n",
      "===================================\n",
      "\n",
      "\n",
      "XGBClassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9978\n",
      "- F1 score: 0.9978\n",
      "- Precision: 0.9978\n",
      "- Recall: 0.9978\n",
      "- COST: 3060.\n",
      "===================================\n",
      "\n",
      "\n",
      "CatBoosting Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9998\n",
      "- F1 score: 0.9998\n",
      "- Precision: 0.9998\n",
      "- Recall: 0.9998\n",
      "- COST: 1020.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9974\n",
      "- F1 score: 0.9974\n",
      "- Precision: 0.9975\n",
      "- Recall: 0.9974\n",
      "- COST: 3570.\n",
      "===================================\n",
      "\n",
      "\n",
      "AdaBoost Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9735\n",
      "- F1 score: 0.9735\n",
      "- Precision: 0.9737\n",
      "- Recall: 0.9735\n",
      "- COST: 147900.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9722\n",
      "- F1 score: 0.9722\n",
      "- Precision: 0.9724\n",
      "- Recall: 0.9722\n",
      "- COST: 38760.\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model Name</th>\n",
       "      <th>Cost</th>\n",
       "      <th>Train_Recall</th>\n",
       "      <th>Test_Recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>3060</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.997807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>XGBClassifier</td>\n",
       "      <td>3060</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.997807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>3570</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.997442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CatBoosting Classifier</td>\n",
       "      <td>3570</td>\n",
       "      <td>0.999817</td>\n",
       "      <td>0.997442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gradient Boosting</td>\n",
       "      <td>8670</td>\n",
       "      <td>0.999360</td>\n",
       "      <td>0.993787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K-Neighbors Classifier</td>\n",
       "      <td>11730</td>\n",
       "      <td>0.993787</td>\n",
       "      <td>0.991594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AdaBoost Classifier</td>\n",
       "      <td>38760</td>\n",
       "      <td>0.973501</td>\n",
       "      <td>0.972222</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Model Name   Cost  Train_Recall  Test_Recall\n",
       "0           Random Forest   3060      1.000000     0.997807\n",
       "4           XGBClassifier   3060      1.000000     0.997807\n",
       "1           Decision Tree   3570      1.000000     0.997442\n",
       "5  CatBoosting Classifier   3570      0.999817     0.997442\n",
       "2       Gradient Boosting   8670      0.999360     0.993787\n",
       "3  K-Neighbors Classifier  11730      0.993787     0.991594\n",
       "6     AdaBoost Classifier  38760      0.973501     0.972222"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report4=evaluate_models(X_res1,y_res1,models)\n",
    "report4"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b322afcc",
   "metadata": {},
   "source": [
    "lets try with randomoversampling technique to handle imbalanced daatset with selected features "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "id": "5cc5fe7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "random=RandomOverSampler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "id": "bf9e9a24",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_res2,y_res2=random.fit_resample(X_scaled_final,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "id": "d5b5fdc5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Random Forest\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9989\n",
      "- F1 score: 0.9989\n",
      "- Precision: 0.9989\n",
      "- Recall: 0.9989\n",
      "- COST: 1530.\n",
      "===================================\n",
      "\n",
      "\n",
      "Decision Tree\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9993\n",
      "- F1 score: 0.9993\n",
      "- Precision: 0.9993\n",
      "- Recall: 0.9993\n",
      "- COST: 1020.\n",
      "===================================\n",
      "\n",
      "\n",
      "Gradient Boosting\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9998\n",
      "- F1 score: 0.9998\n",
      "- Precision: 0.9998\n",
      "- Recall: 0.9998\n",
      "- COST: 1020.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9985\n",
      "- F1 score: 0.9985\n",
      "- Precision: 0.9985\n",
      "- Recall: 0.9985\n",
      "- COST: 2040.\n",
      "===================================\n",
      "\n",
      "\n",
      "K-Neighbors Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.9947\n",
      "- F1 score: 0.9947\n",
      "- Precision: 0.9948\n",
      "- Recall: 0.9947\n",
      "- COST: 29580.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9931\n",
      "- F1 score: 0.9930\n",
      "- Precision: 0.9932\n",
      "- Recall: 0.9931\n",
      "- COST: 9690.\n",
      "===================================\n",
      "\n",
      "\n",
      "XGBClassifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9985\n",
      "- F1 score: 0.9985\n",
      "- Precision: 0.9985\n",
      "- Recall: 0.9985\n",
      "- COST: 2040.\n",
      "===================================\n",
      "\n",
      "\n",
      "CatBoosting Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 1.0000\n",
      "- F1 score: 1.0000\n",
      "- Precision: 1.0000\n",
      "- Recall: 1.0000\n",
      "- COST: 0.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.9985\n",
      "- F1 score: 0.9985\n",
      "- Precision: 0.9985\n",
      "- Recall: 0.9985\n",
      "- COST: 2040.\n",
      "===================================\n",
      "\n",
      "\n",
      "AdaBoost Classifier\n",
      "Model performance for Training set\n",
      "- Accuracy: 0.8597\n",
      "- F1 score: 0.8518\n",
      "- Precision: 0.9054\n",
      "- Recall: 0.8597\n",
      "- COST: 782850.\n",
      "----------------------------------\n",
      "Model performance for Test set\n",
      "- Accuracy: 0.8692\n",
      "- F1 score: 0.8616\n",
      "- Precision: 0.9096\n",
      "- Recall: 0.8692\n",
      "- COST: 182580.\n",
      "===================================\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "report5=evaluate_models(X_res2,y_res2,models)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "19a26b6a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model Name</th>\n",
       "      <th>Cost</th>\n",
       "      <th>Train_Recall</th>\n",
       "      <th>Test_Recall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Decision Tree</td>\n",
       "      <td>1020</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Random Forest</td>\n",
       "      <td>1530</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gradient Boosting</td>\n",
       "      <td>2040</td>\n",
       "      <td>0.999817</td>\n",
       "      <td>0.998538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>XGBClassifier</td>\n",
       "      <td>2040</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>CatBoosting Classifier</td>\n",
       "      <td>2040</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.998538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K-Neighbors Classifier</td>\n",
       "      <td>9690</td>\n",
       "      <td>0.994700</td>\n",
       "      <td>0.993056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AdaBoost Classifier</td>\n",
       "      <td>182580</td>\n",
       "      <td>0.859740</td>\n",
       "      <td>0.869152</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Model Name    Cost  Train_Recall  Test_Recall\n",
       "1           Decision Tree    1020      1.000000     0.999269\n",
       "0           Random Forest    1530      1.000000     0.998904\n",
       "2       Gradient Boosting    2040      0.999817     0.998538\n",
       "4           XGBClassifier    2040      1.000000     0.998538\n",
       "5  CatBoosting Classifier    2040      1.000000     0.998538\n",
       "3  K-Neighbors Classifier    9690      0.994700     0.993056\n",
       "6     AdaBoost Classifier  182580      0.859740     0.869152"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "report5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "a32b604a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "a756db3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('df.pkl','wb')as file:\n",
    "    pickle.dump(df1,file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "ec48a3eb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['final_pipe_object.joblib']"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(pipe,'final_pipe_object.joblib')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "2e509a87",
   "metadata": {},
   "source": [
    "#Prediction on new dataset with created model "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "77f6c08c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_pickle('df.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "9eea5f5c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>1.30</td>\n",
       "      <td>2.5</td>\n",
       "      <td>125.0</td>\n",
       "      <td>1.14</td>\n",
       "      <td>109.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>4.10</td>\n",
       "      <td>2.0</td>\n",
       "      <td>102.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>109.0</td>\n",
       "      <td>0.91</td>\n",
       "      <td>120.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>F</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.16</td>\n",
       "      <td>1.9</td>\n",
       "      <td>175.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.72</td>\n",
       "      <td>1.2</td>\n",
       "      <td>61.0</td>\n",
       "      <td>0.87</td>\n",
       "      <td>70.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick pregnant  \\\n",
       "0   F            f                  f                         f    f        f   \n",
       "1   F            f                  f                         f    f        f   \n",
       "2   M            f                  f                         f    f        f   \n",
       "3   F            t                  f                         f    f        f   \n",
       "4   F            f                  f                         f    f        f   \n",
       "\n",
       "  I131_treatment tumor hypopituitary psych   TSH   T3    TT4   T4U    FTI  \\\n",
       "0              f     f             f     f  1.30  2.5  125.0  1.14  109.0   \n",
       "1              f     f             f     f  4.10  2.0  102.0   NaN    NaN   \n",
       "2              f     f             f     f  0.98  NaN  109.0  0.91  120.0   \n",
       "3              f     f             f     f  0.16  1.9  175.0   NaN    NaN   \n",
       "4              f     f             f     f  0.72  1.2   61.0  0.87   70.0   \n",
       "\n",
       "      Class  \n",
       "0  negative  \n",
       "1  negative  \n",
       "2  negative  \n",
       "3  negative  \n",
       "4  negative  "
      ]
     },
     "execution_count": 145,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "25b99fc5",
   "metadata": {},
   "outputs": [],
   "source": [
    "model=joblib.load('final_pipe_object.joblib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "84989422",
   "metadata": {},
   "outputs": [],
   "source": [
    "label_predictor=joblib.load('label_encoder.joblib')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "id": "6c770076",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-9 {color: black;background-color: white;}#sk-container-id-9 pre{padding: 0;}#sk-container-id-9 div.sk-toggleable {background-color: white;}#sk-container-id-9 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-9 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-9 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-9 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-9 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-9 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-9 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-9 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-9 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-9 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-9 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-9 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-9 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-9 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-9 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-9 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-9 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-9 div.sk-item {position: relative;z-index: 1;}#sk-container-id-9 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-9 div.sk-item::before, #sk-container-id-9 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-9 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-9 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-9 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-9 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-9 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-9 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-9 div.sk-label-container {text-align: center;}#sk-container-id-9 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-9 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-9\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Pipeline(steps=[(&#x27;preprocessor1&#x27;,\n",
       "                 ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                                  (&#x27;robust_scaler&#x27;,\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;,\n",
       "                                                   &#x27;FTI&#x27;]),\n",
       "                                                 (&#x27;cat&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                                  (&#x27;onehot&#x27;,\n",
       "                                                                   OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                                 handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                                  [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;,\n",
       "                                                   &#x27;query_on_thyroxine&#x27;,\n",
       "                                                   &#x27;on_antithyroid_medication&#x27;,\n",
       "                                                   &#x27;sick&#x27;, &#x27;pregnant&#x27;,\n",
       "                                                   &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                                   &#x27;hypopituitary&#x27;,\n",
       "                                                   &#x27;psych&#x27;])])),\n",
       "                (&#x27;step2&#x27;, DecisionTreeClassifier())])</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-27\" type=\"checkbox\" ><label for=\"sk-estimator-id-27\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Pipeline</label><div class=\"sk-toggleable__content\"><pre>Pipeline(steps=[(&#x27;preprocessor1&#x27;,\n",
       "                 ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                                  (&#x27;robust_scaler&#x27;,\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;,\n",
       "                                                   &#x27;FTI&#x27;]),\n",
       "                                                 (&#x27;cat&#x27;,\n",
       "                                                  Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                                   SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                                  (&#x27;onehot&#x27;,\n",
       "                                                                   OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                                 handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                                  [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;,\n",
       "                                                   &#x27;query_on_thyroxine&#x27;,\n",
       "                                                   &#x27;on_antithyroid_medication&#x27;,\n",
       "                                                   &#x27;sick&#x27;, &#x27;pregnant&#x27;,\n",
       "                                                   &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                                   &#x27;hypopituitary&#x27;,\n",
       "                                                   &#x27;psych&#x27;])])),\n",
       "                (&#x27;step2&#x27;, DecisionTreeClassifier())])</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-28\" type=\"checkbox\" ><label for=\"sk-estimator-id-28\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">preprocessor1: ColumnTransformer</label><div class=\"sk-toggleable__content\"><pre>ColumnTransformer(transformers=[(&#x27;num&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;median&#x27;)),\n",
       "                                                 (&#x27;robust_scaler&#x27;,\n",
       "                                                  RobustScaler())]),\n",
       "                                 [&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]),\n",
       "                                (&#x27;cat&#x27;,\n",
       "                                 Pipeline(steps=[(&#x27;imputer&#x27;,\n",
       "                                                  SimpleImputer(strategy=&#x27;most_frequent&#x27;)),\n",
       "                                                 (&#x27;onehot&#x27;,\n",
       "                                                  OneHotEncoder(drop=&#x27;first&#x27;,\n",
       "                                                                handle_unknown=&#x27;ignore&#x27;))]),\n",
       "                                 [&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;,\n",
       "                                  &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;,\n",
       "                                  &#x27;pregnant&#x27;, &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;,\n",
       "                                  &#x27;hypopituitary&#x27;, &#x27;psych&#x27;])])</pre></div></div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-29\" type=\"checkbox\" ><label for=\"sk-estimator-id-29\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">num</label><div class=\"sk-toggleable__content\"><pre>[&#x27;TSH&#x27;, &#x27;T3&#x27;, &#x27;TT4&#x27;, &#x27;T4U&#x27;, &#x27;FTI&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-30\" type=\"checkbox\" ><label for=\"sk-estimator-id-30\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;median&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-31\" type=\"checkbox\" ><label for=\"sk-estimator-id-31\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RobustScaler</label><div class=\"sk-toggleable__content\"><pre>RobustScaler()</pre></div></div></div></div></div></div></div></div><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-32\" type=\"checkbox\" ><label for=\"sk-estimator-id-32\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">cat</label><div class=\"sk-toggleable__content\"><pre>[&#x27;sex&#x27;, &#x27;on_thyroxine&#x27;, &#x27;query_on_thyroxine&#x27;, &#x27;on_antithyroid_medication&#x27;, &#x27;sick&#x27;, &#x27;pregnant&#x27;, &#x27;I131_treatment&#x27;, &#x27;tumor&#x27;, &#x27;hypopituitary&#x27;, &#x27;psych&#x27;]</pre></div></div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-33\" type=\"checkbox\" ><label for=\"sk-estimator-id-33\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SimpleImputer</label><div class=\"sk-toggleable__content\"><pre>SimpleImputer(strategy=&#x27;most_frequent&#x27;)</pre></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-34\" type=\"checkbox\" ><label for=\"sk-estimator-id-34\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">OneHotEncoder</label><div class=\"sk-toggleable__content\"><pre>OneHotEncoder(drop=&#x27;first&#x27;, handle_unknown=&#x27;ignore&#x27;)</pre></div></div></div></div></div></div></div></div></div></div><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-35\" type=\"checkbox\" ><label for=\"sk-estimator-id-35\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">DecisionTreeClassifier</label><div class=\"sk-toggleable__content\"><pre>DecisionTreeClassifier()</pre></div></div></div></div></div></div></div>"
      ],
      "text/plain": [
       "Pipeline(steps=[('preprocessor1',\n",
       "                 ColumnTransformer(transformers=[('num',\n",
       "                                                  Pipeline(steps=[('imputer',\n",
       "                                                                   SimpleImputer(strategy='median')),\n",
       "                                                                  ('robust_scaler',\n",
       "                                                                   RobustScaler())]),\n",
       "                                                  ['TSH', 'T3', 'TT4', 'T4U',\n",
       "                                                   'FTI']),\n",
       "                                                 ('cat',\n",
       "                                                  Pipeline(steps=[('imputer',\n",
       "                                                                   SimpleImputer(strategy='most_frequent')),\n",
       "                                                                  ('onehot',\n",
       "                                                                   OneHotEncoder(drop='first',\n",
       "                                                                                 handle_unknown='ignore'))]),\n",
       "                                                  ['sex', 'on_thyroxine',\n",
       "                                                   'query_on_thyroxine',\n",
       "                                                   'on_antithyroid_medication',\n",
       "                                                   'sick', 'pregnant',\n",
       "                                                   'I131_treatment', 'tumor',\n",
       "                                                   'hypopituitary',\n",
       "                                                   'psych'])])),\n",
       "                ('step2', DecisionTreeClassifier())])"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "7e5ab492",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "      <th>Class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3404</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>7.50</td>\n",
       "      <td>1.6</td>\n",
       "      <td>82.0</td>\n",
       "      <td>1.08</td>\n",
       "      <td>77.0</td>\n",
       "      <td>compensated_hypothyroid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1039</th>\n",
       "      <td>M</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>0.01</td>\n",
       "      <td>2.4</td>\n",
       "      <td>108.0</td>\n",
       "      <td>0.70</td>\n",
       "      <td>154.0</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick  \\\n",
       "3404   F            f                  f                         f    f   \n",
       "1039   M            f                  f                         f    f   \n",
       "\n",
       "     pregnant I131_treatment tumor hypopituitary psych   TSH   T3    TT4  \\\n",
       "3404        f              f     f             f     f  7.50  1.6   82.0   \n",
       "1039        f              t     f             f     f  0.01  2.4  108.0   \n",
       "\n",
       "       T4U    FTI                    Class  \n",
       "3404  1.08   77.0  compensated_hypothyroid  \n",
       "1039  0.70  154.0                 negative  "
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1.sample(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "1de8aacb",
   "metadata": {},
   "outputs": [],
   "source": [
    "data={\n",
    "   'sex':'M',\n",
    "    'on_thyroxine':'f',\n",
    "    'query_on_thyroxine':'f',\n",
    "    'on_antithyroid_medication':'f',\n",
    "    'sick':'f',\n",
    "    'pregnant':'f',\n",
    "    'I131_treatment':'f',\n",
    "    'tumor':'f',\n",
    "    'hypopituitary':'f',\n",
    "    'psych':'f',\n",
    "    'TSH':7.50,\n",
    "    'T3':1.6,\n",
    "    'TT4':82.0,\n",
    "    'T4U':1.08,\n",
    "    'FTI':65.0\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "id": "bf165b6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.6</td>\n",
       "      <td>82.0</td>\n",
       "      <td>1.08</td>\n",
       "      <td>77.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick pregnant  \\\n",
       "0   M            f                  f                         f    f        f   \n",
       "\n",
       "  I131_treatment tumor hypopituitary psych  TSH   T3   TT4   T4U   FTI  \n",
       "0              f     f             f     f  7.5  1.6  82.0  1.08  77.0  "
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test=pd.DataFrame(data,index=[0])\n",
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "7e3c8f19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['negative'], dtype=object)"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_predictor.inverse_transform([1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "id": "6ecc7c3b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'compensated_hypothyroid'"
      ]
     },
     "execution_count": 181,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(df_test)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "518ea85d",
   "metadata": {},
   "outputs": [],
   "source": [
    "data2={\n",
    "   'sex':'F',\n",
    "    'on_thyroxine':'f',\n",
    "    'query_on_thyroxine':'f',\n",
    "    'on_antithyroid_medication':'f',\n",
    "    'sick':'f',\n",
    "    'pregnant':'t',\n",
    "    'I131_treatment':'f',\n",
    "    'tumor':'f',\n",
    "    'hypopituitary':'f',\n",
    "    'psych':'f',\n",
    "    'TSH':7.50,\n",
    "    'T3':1.9,\n",
    "    'TT4':190.0,\n",
    "    'T4U':1.08,\n",
    "    'FTI':65.0\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "fc534ddf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>on_thyroxine</th>\n",
       "      <th>query_on_thyroxine</th>\n",
       "      <th>on_antithyroid_medication</th>\n",
       "      <th>sick</th>\n",
       "      <th>pregnant</th>\n",
       "      <th>I131_treatment</th>\n",
       "      <th>tumor</th>\n",
       "      <th>hypopituitary</th>\n",
       "      <th>psych</th>\n",
       "      <th>TSH</th>\n",
       "      <th>T3</th>\n",
       "      <th>TT4</th>\n",
       "      <th>T4U</th>\n",
       "      <th>FTI</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>F</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>t</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>f</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.9</td>\n",
       "      <td>190.0</td>\n",
       "      <td>1.08</td>\n",
       "      <td>65.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  sex on_thyroxine query_on_thyroxine on_antithyroid_medication sick pregnant  \\\n",
       "0   F            f                  f                         f    f        t   \n",
       "\n",
       "  I131_treatment tumor hypopituitary psych  TSH   T3    TT4   T4U   FTI  \n",
       "0              f     f             f     f  7.5  1.9  190.0  1.08  65.0  "
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test=pd.DataFrame(data2,index=[0])\n",
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "5043c259",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'negative'"
      ]
     },
     "execution_count": 191,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.predict(df_test)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba8ad802",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
